externals: Update dynarmic to latest

Updates dynarmic to its latest commit. Includes a fix for argument limits while compiling with Clang 12.
Merge pull request #5121 from bunnei/optimize-core-timing
2021-02-18 14:44:49 -05:00 · 2021-02-16 13:17:22 -08:00 · 2021-02-16 22:52:35 +08:00 · 2021-02-15 20:31:16 -08:00 · 2021-02-15 19:35:56 -05:00 · 2021-02-15 21:34:13 -03:00
185 changed files with 5227 additions and 3946 deletions
--- a/dist/qt_themes/colorful_dark/icons/index.theme
+++ b/dist/qt_themes/colorful_dark/icons/index.theme
@@ -1,7 +1,7 @@
 [Icon Theme]
 Name=colorful_dark
 Comment=Colorful theme (Dark style)
-Inherits=default
+Inherits=colorful
 Directories=16x16
 
 [16x16]
--- a/dist/qt_themes/colorful_midnight_blue/icons/index.theme
+++ b/dist/qt_themes/colorful_midnight_blue/icons/index.theme
@@ -1,7 +1,7 @@
 [Icon Theme]
 Name=colorful_midnight_blue
 Comment=Colorful theme (Midnight Blue style)
-Inherits=default
+Inherits=colorful
 Directories=16x16

 [16x16]
--- a/dist/qt_themes/qdarkstyle_midnight_blue/style.qss
+++ b/dist/qt_themes/qdarkstyle_midnight_blue/style.qss
@@ -1257,10 +1257,6 @@ QComboBox::item:alternate {
  background: #19232D;
 }

-QComboBox::item:checked {
-  font-weight: bold;
-}
-
 QComboBox::item:selected {
  border: 0px solid transparent;
 }
--- a/dist/yuzu.bmp
+++ b/dist/yuzu.bmp
--- a/externals/dynarmic
+++ b/externals/dynarmic
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -15,6 +15,8 @@ add_library(audio_core STATIC
    command_generator.cpp
    command_generator.h
    common.h
+    delay_line.cpp
+    delay_line.h
    effect_context.cpp
    effect_context.h
    info_updater.cpp
--- a/src/audio_core/command_generator.cpp
+++ b/src/audio_core/command_generator.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <cmath>
+#include <numbers>
 #include "audio_core/algorithm/interpolate.h"
 #include "audio_core/command_generator.h"
 #include "audio_core/effect_context.h"
@@ -13,6 +15,20 @@ namespace AudioCore {
 namespace {
 constexpr std::size_t MIX_BUFFER_SIZE = 0x3f00;
 constexpr std::size_t SCALED_MIX_BUFFER_SIZE = MIX_BUFFER_SIZE << 15ULL;
+using DelayLineTimes = std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT>;
+
+constexpr DelayLineTimes FDN_MIN_DELAY_LINE_TIMES{5.0f, 6.0f, 13.0f, 14.0f};
+constexpr DelayLineTimes FDN_MAX_DELAY_LINE_TIMES{45.704f, 82.782f, 149.94f, 271.58f};
+constexpr DelayLineTimes DECAY0_MAX_DELAY_LINE_TIMES{17.0f, 13.0f, 9.0f, 7.0f};
+constexpr DelayLineTimes DECAY1_MAX_DELAY_LINE_TIMES{19.0f, 11.0f, 10.0f, 6.0f};
+constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_TAP_TIMES{
+    0.017136f, 0.059154f, 0.161733f, 0.390186f, 0.425262f, 0.455411f, 0.689737f,
+    0.745910f, 0.833844f, 0.859502f, 0.000000f, 0.075024f, 0.168788f, 0.299901f,
+    0.337443f, 0.371903f, 0.599011f, 0.716741f, 0.817859f, 0.851664f};
+constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_GAIN{
+    0.67096f, 0.61027f, 1.0f,     0.35680f, 0.68361f, 0.65978f, 0.51939f,
+    0.24712f, 0.45945f, 0.45021f, 0.64196f, 0.54879f, 0.92925f, 0.38270f,
+    0.72867f, 0.69794f, 0.5464f,  0.24563f, 0.45214f, 0.44042f};

 template <std::size_t N>
 void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) {
@@ -65,6 +81,154 @@ s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) {
    }
 }

+float Pow10(float x) {
+    if (x >= 0.0f) {
+        return 1.0f;
+    } else if (x <= -5.3f) {
+        return 0.0f;
+    }
+    return std::pow(10.0f, x);
+}
+
+float SinD(float degrees) {
+    return std::sin(degrees * std::numbers::pi_v<float> / 180.0f);
+}
+
+float CosD(float degrees) {
+    return std::cos(degrees * std::numbers::pi_v<float> / 180.0f);
+}
+
+float ToFloat(s32 sample) {
+    return static_cast<float>(sample) / 65536.f;
+}
+
+s32 ToS32(float sample) {
+    constexpr auto min = -8388608.0f;
+    constexpr auto max = 8388607.f;
+    float rescaled_sample = sample * 65536.0f;
+    if (rescaled_sample < min) {
+        rescaled_sample = min;
+    }
+    if (rescaled_sample > max) {
+        rescaled_sample = max;
+    }
+    return static_cast<s32>(rescaled_sample);
+}
+
+constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_1CH{0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_2CH{0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
+                                                           1, 1, 1, 0, 0, 0, 0, 1, 1, 1};
+
+constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_4CH{0, 0, 0, 1, 1, 1, 1, 2, 2, 2,
+                                                           1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
+
+constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1, 2, 2, 2,
+                                                           1, 1, 1, 0, 0, 0, 0, 3, 3, 3};
+
+template <std::size_t CHANNEL_COUNT>
+void ApplyReverbGeneric(I3dl2ReverbState& state,
+                        const std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT>& input,
+                        const std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT>& output,
+                        s32 sample_count) {
+
+    auto GetTapLookup = []() {
+        if constexpr (CHANNEL_COUNT == 1) {
+            return REVERB_TAP_INDEX_1CH;
+        } else if constexpr (CHANNEL_COUNT == 2) {
+            return REVERB_TAP_INDEX_2CH;
+        } else if constexpr (CHANNEL_COUNT == 4) {
+            return REVERB_TAP_INDEX_4CH;
+        } else if constexpr (CHANNEL_COUNT == 6) {
+            return REVERB_TAP_INDEX_6CH;
+        }
+    };
+
+    const auto& tap_index_lut = GetTapLookup();
+    for (s32 sample = 0; sample < sample_count; sample++) {
+        std::array<f32, CHANNEL_COUNT> out_samples{};
+        std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> fsamp{};
+        std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> mixed{};
+        std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> osamp{};
+
+        // Mix everything into a single sample
+        s32 temp_mixed_sample = 0;
+        for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
+            temp_mixed_sample += input[i][sample];
+        }
+        const auto current_sample = ToFloat(temp_mixed_sample);
+        const auto early_tap = state.early_delay_line.TapOut(state.early_to_late_taps);
+
+        for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_TAPS; i++) {
+            const auto tapped_samp =
+                state.early_delay_line.TapOut(state.early_tap_steps[i]) * EARLY_GAIN[i];
+            out_samples[tap_index_lut[i]] += tapped_samp;
+
+            if constexpr (CHANNEL_COUNT == 6) {
+                // handle lfe
+                out_samples[5] += tapped_samp;
+            }
+        }
+
+        state.lowpass_0 = current_sample * state.lowpass_2 + state.lowpass_0 * state.lowpass_1;
+        state.early_delay_line.Tick(state.lowpass_0);
+
+        for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
+            out_samples[i] *= state.early_gain;
+        }
+
+        // Two channel seems to apply a latet gain, we require to save this
+        f32 filter{};
+        for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+            filter = state.fdn_delay_line[i].GetOutputSample();
+            const auto computed = filter * state.lpf_coefficients[0][i] + state.shelf_filter[i];
+            state.shelf_filter[i] =
+                filter * state.lpf_coefficients[1][i] + computed * state.lpf_coefficients[2][i];
+            fsamp[i] = computed;
+        }
+
+        // Mixing matrix
+        mixed[0] = fsamp[1] + fsamp[2];
+        mixed[1] = -fsamp[0] - fsamp[3];
+        mixed[2] = fsamp[0] - fsamp[3];
+        mixed[3] = fsamp[1] - fsamp[2];
+
+        if constexpr (CHANNEL_COUNT == 2) {
+            for (auto& mix : mixed) {
+                mix *= (filter * state.late_gain);
+            }
+        }
+
+        for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+            const auto late = early_tap * state.late_gain;
+            osamp[i] = state.decay_delay_line0[i].Tick(late + mixed[i]);
+            osamp[i] = state.decay_delay_line1[i].Tick(osamp[i]);
+            state.fdn_delay_line[i].Tick(osamp[i]);
+        }
+
+        if constexpr (CHANNEL_COUNT == 1) {
+            output[0][sample] = ToS32(state.dry_gain * ToFloat(input[0][sample]) +
+                                      (out_samples[0] + osamp[0] + osamp[1]));
+        } else if constexpr (CHANNEL_COUNT == 2 || CHANNEL_COUNT == 4) {
+            for (std::size_t i = 0; i < CHANNEL_COUNT; i++) {
+                output[i][sample] =
+                    ToS32(state.dry_gain * ToFloat(input[i][sample]) + (out_samples[i] + osamp[i]));
+            }
+        } else if constexpr (CHANNEL_COUNT == 6) {
+            const auto temp_center = state.center_delay_line.Tick(0.5f * (osamp[2] - osamp[3]));
+            for (std::size_t i = 0; i < 4; i++) {
+                output[i][sample] =
+                    ToS32(state.dry_gain * ToFloat(input[i][sample]) + (out_samples[i] + osamp[i]));
+            }
+            output[4][sample] =
+                ToS32(state.dry_gain * ToFloat(input[4][sample]) + (out_samples[4] + temp_center));
+            output[5][sample] =
+                ToS32(state.dry_gain * ToFloat(input[5][sample]) + (out_samples[5] + osamp[3]));
+        }
+    }
+}
+
 } // namespace

 CommandGenerator::CommandGenerator(AudioCommon::AudioRendererParameter& worker_params_,
@@ -271,11 +435,10 @@ void CommandGenerator::GenerateBiquadFilterCommandForVoice(ServerVoiceInfo& voic
        }

        // Generate biquad filter
-        //        GenerateBiquadFilterCommand(mix_buffer_count, biquad_filter,
-        //        dsp_state.biquad_filter_state,
-        //                                    mix_buffer_count + channel, mix_buffer_count +
-        //                                    channel, worker_params.sample_count,
-        //                                    voice_info.GetInParams().node_id);
+        // GenerateBiquadFilterCommand(mix_buffer_count, biquad_filter,
+        // dsp_state.biquad_filter_state,
+        //                            mix_buffer_count + channel, mix_buffer_count + channel,
+        //                            worker_params.sample_count, voice_info.GetInParams().node_id);
    }
 }

@@ -376,21 +539,54 @@ void CommandGenerator::GenerateEffectCommand(ServerMixInfo& mix_info) {

 void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, EffectBase* info,
                                                        bool enabled) {
-    if (!enabled) {
+    auto* reverb = dynamic_cast<EffectI3dl2Reverb*>(info);
+    const auto& params = reverb->GetParams();
+    auto& state = reverb->GetState();
+    const auto channel_count = params.channel_count;
+
+    if (channel_count != 1 && channel_count != 2 && channel_count != 4 && channel_count != 6) {
        return;
    }
-    const auto& params = dynamic_cast<EffectI3dl2Reverb*>(info)->GetParams();
-    const auto channel_count = params.channel_count;
+
+    std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT> input{};
+    std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT> output{};
+
+    const auto status = params.status;
    for (s32 i = 0; i < channel_count; i++) {
-        // TODO(ogniK): Actually implement reverb
-        /*
-        if (params.input[i] != params.output[i]) {
-            const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]);
-            auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]);
-            ApplyMix<1>(output, input, 32768, worker_params.sample_count);
-        }*/
-        auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]);
-        std::memset(output, 0, worker_params.sample_count * sizeof(s32));
+        input[i] = GetMixBuffer(mix_buffer_offset + params.input[i]);
+        output[i] = GetMixBuffer(mix_buffer_offset + params.output[i]);
+    }
+
+    if (enabled) {
+        if (status == ParameterStatus::Initialized) {
+            InitializeI3dl2Reverb(reverb->GetParams(), state, info->GetWorkBuffer());
+        } else if (status == ParameterStatus::Updating) {
+            UpdateI3dl2Reverb(reverb->GetParams(), state, false);
+        }
+    }
+
+    if (enabled) {
+        switch (channel_count) {
+        case 1:
+            ApplyReverbGeneric<1>(state, input, output, worker_params.sample_count);
+            break;
+        case 2:
+            ApplyReverbGeneric<2>(state, input, output, worker_params.sample_count);
+            break;
+        case 4:
+            ApplyReverbGeneric<4>(state, input, output, worker_params.sample_count);
+            break;
+        case 6:
+            ApplyReverbGeneric<6>(state, input, output, worker_params.sample_count);
+            break;
+        }
+    } else {
+        for (s32 i = 0; i < channel_count; i++) {
+            // Only copy if the buffer input and output do not match!
+            if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) {
+                std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32));
+            }
+        }
    }
 }

@@ -528,6 +724,133 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3
    return sample_count;
 }

+void CommandGenerator::InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
+                                             std::vector<u8>& work_buffer) {
+    // Reset state
+    state.lowpass_0 = 0.0f;
+    state.lowpass_1 = 0.0f;
+    state.lowpass_2 = 0.0f;
+
+    state.early_delay_line.Reset();
+    state.early_tap_steps.fill(0);
+    state.early_gain = 0.0f;
+    state.late_gain = 0.0f;
+    state.early_to_late_taps = 0;
+    for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+        state.fdn_delay_line[i].Reset();
+        state.decay_delay_line0[i].Reset();
+        state.decay_delay_line1[i].Reset();
+    }
+    state.last_reverb_echo = 0.0f;
+    state.center_delay_line.Reset();
+    for (auto& coef : state.lpf_coefficients) {
+        coef.fill(0.0f);
+    }
+    state.shelf_filter.fill(0.0f);
+    state.dry_gain = 0.0f;
+
+    const auto sample_rate = info.sample_rate / 1000;
+    f32* work_buffer_ptr = reinterpret_cast<f32*>(work_buffer.data());
+
+    s32 delay_samples{};
+    for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+        delay_samples =
+            AudioCommon::CalculateDelaySamples(sample_rate, FDN_MAX_DELAY_LINE_TIMES[i]);
+        state.fdn_delay_line[i].Initialize(delay_samples, work_buffer_ptr);
+        work_buffer_ptr += delay_samples + 1;
+
+        delay_samples =
+            AudioCommon::CalculateDelaySamples(sample_rate, DECAY0_MAX_DELAY_LINE_TIMES[i]);
+        state.decay_delay_line0[i].Initialize(delay_samples, 0.0f, work_buffer_ptr);
+        work_buffer_ptr += delay_samples + 1;
+
+        delay_samples =
+            AudioCommon::CalculateDelaySamples(sample_rate, DECAY1_MAX_DELAY_LINE_TIMES[i]);
+        state.decay_delay_line1[i].Initialize(delay_samples, 0.0f, work_buffer_ptr);
+        work_buffer_ptr += delay_samples + 1;
+    }
+    delay_samples = AudioCommon::CalculateDelaySamples(sample_rate, 5.0f);
+    state.center_delay_line.Initialize(delay_samples, work_buffer_ptr);
+    work_buffer_ptr += delay_samples + 1;
+
+    delay_samples = AudioCommon::CalculateDelaySamples(sample_rate, 400.0f);
+    state.early_delay_line.Initialize(delay_samples, work_buffer_ptr);
+
+    UpdateI3dl2Reverb(info, state, true);
+}
+
+void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
+                                         bool should_clear) {
+
+    state.dry_gain = info.dry_gain;
+    state.shelf_filter.fill(0.0f);
+    state.lowpass_0 = 0.0f;
+    state.early_gain = Pow10(std::min(info.room + info.reflection, 5000.0f) / 2000.0f);
+    state.late_gain = Pow10(std::min(info.room + info.reverb, 5000.0f) / 2000.0f);
+
+    const auto sample_rate = info.sample_rate / 1000;
+    const f32 hf_gain = Pow10(info.room_hf / 2000.0f);
+    if (hf_gain >= 1.0f) {
+        state.lowpass_2 = 1.0f;
+        state.lowpass_1 = 0.0f;
+    } else {
+        const auto a = 1.0f - hf_gain;
+        const auto b = 2.0f * (1.0f - hf_gain * CosD(256.0f * info.hf_reference /
+                                                     static_cast<f32>(info.sample_rate)));
+        const auto c = std::sqrt(b * b - 4.0f * a * a);
+
+        state.lowpass_1 = (b - c) / (2.0f * a);
+        state.lowpass_2 = 1.0f - state.lowpass_1;
+    }
+    state.early_to_late_taps = AudioCommon::CalculateDelaySamples(
+        sample_rate, 1000.0f * (info.reflection_delay + info.reverb_delay));
+
+    state.last_reverb_echo = 0.6f * info.diffusion * 0.01f;
+    for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+        const auto length =
+            FDN_MIN_DELAY_LINE_TIMES[i] +
+            (info.density / 100.0f) * (FDN_MAX_DELAY_LINE_TIMES[i] - FDN_MIN_DELAY_LINE_TIMES[i]);
+        state.fdn_delay_line[i].SetDelay(AudioCommon::CalculateDelaySamples(sample_rate, length));
+
+        const auto delay_sample_counts = state.fdn_delay_line[i].GetDelay() +
+                                         state.decay_delay_line0[i].GetDelay() +
+                                         state.decay_delay_line1[i].GetDelay();
+
+        float a = (-60.0f * static_cast<f32>(delay_sample_counts)) /
+                  (info.decay_time * static_cast<f32>(info.sample_rate));
+        float b = a / info.hf_decay_ratio;
+        float c = CosD(128.0f * 0.5f * info.hf_reference / static_cast<f32>(info.sample_rate)) /
+                  SinD(128.0f * 0.5f * info.hf_reference / static_cast<f32>(info.sample_rate));
+        float d = Pow10((b - a) / 40.0f);
+        float e = Pow10((b + a) / 40.0f) * 0.7071f;
+
+        state.lpf_coefficients[0][i] = e * ((d * c) + 1.0f) / (c + d);
+        state.lpf_coefficients[1][i] = e * (1.0f - (d * c)) / (c + d);
+        state.lpf_coefficients[2][i] = (c - d) / (c + d);
+
+        state.decay_delay_line0[i].SetCoefficient(state.last_reverb_echo);
+        state.decay_delay_line1[i].SetCoefficient(-0.9f * state.last_reverb_echo);
+    }
+
+    if (should_clear) {
+        for (std::size_t i = 0; i < AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT; i++) {
+            state.fdn_delay_line[i].Clear();
+            state.decay_delay_line0[i].Clear();
+            state.decay_delay_line1[i].Clear();
+        }
+        state.early_delay_line.Clear();
+        state.center_delay_line.Clear();
+    }
+
+    const auto max_early_delay = state.early_delay_line.GetMaxDelay();
+    const auto reflection_time = 1000.0f * (0.0098f * info.reverb_delay + 0.02f);
+    for (std::size_t tap = 0; tap < AudioCommon::I3DL2REVERB_TAPS; tap++) {
+        const auto length = AudioCommon::CalculateDelaySamples(
+            sample_rate, 1000.0f * info.reflection_delay + reflection_time * EARLY_TAP_TIMES[tap]);
+        state.early_tap_steps[tap] = std::min(length, max_early_delay);
+    }
+}
+
 void CommandGenerator::GenerateVolumeRampCommand(float last_volume, float current_volume,
                                                 s32 channel, s32 node_id) {
    const auto last = static_cast<s32>(last_volume * 32768.0f);
--- a/src/audio_core/command_generator.h
+++ b/src/audio_core/command_generator.h
@@ -21,6 +21,8 @@ class ServerMixInfo;
 class EffectContext;
 class EffectBase;
 struct AuxInfoDSP;
+struct I3dl2ReverbParams;
+struct I3dl2ReverbState;
 using MixVolumeBuffer = std::array<float, AudioCommon::MAX_MIX_BUFFERS>;

 class CommandGenerator {
@@ -80,6 +82,9 @@ private:
    s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data,
                      u32 sample_count, u32 read_offset, u32 read_count);

+    void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state,
+                               std::vector<u8>& work_buffer);
+    void UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, bool should_clear);
    // DSP Code
    s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count,
                    s32 channel, std::size_t mix_offset);
--- a/src/audio_core/common.h
+++ b/src/audio_core/common.h
@@ -33,6 +33,29 @@ constexpr std::size_t TEMP_MIX_BASE_SIZE = 0x3f00; // TODO(ogniK): Work out this
 // and our const ends up being 0x3f04, the 4 bytes are most
 // likely the sample history
 constexpr std::size_t TOTAL_TEMP_MIX_SIZE = TEMP_MIX_BASE_SIZE + AudioCommon::MAX_SAMPLE_HISTORY;
+constexpr f32 I3DL2REVERB_MAX_LEVEL = 5000.0f;
+constexpr f32 I3DL2REVERB_MIN_REFLECTION_DURATION = 0.02f;
+constexpr std::size_t I3DL2REVERB_TAPS = 20;
+constexpr std::size_t I3DL2REVERB_DELAY_LINE_COUNT = 4;
+using Fractional = s32;
+
+template <typename T>
+constexpr Fractional ToFractional(T x) {
+    return static_cast<Fractional>(x * static_cast<T>(0x4000));
+}
+
+constexpr Fractional MultiplyFractional(Fractional lhs, Fractional rhs) {
+    return static_cast<Fractional>(static_cast<s64>(lhs) * rhs >> 14);
+}
+
+constexpr s32 FractionalToFixed(Fractional x) {
+    const auto s = x & (1 << 13);
+    return static_cast<s32>(x >> 14) + s;
+}
+
+constexpr s32 CalculateDelaySamples(s32 sample_rate_khz, float time) {
+    return FractionalToFixed(MultiplyFractional(ToFractional(sample_rate_khz), ToFractional(time)));
+}

 static constexpr u32 VersionFromRevision(u32_le rev) {
    // "REV7" -> 7
--- a/src/audio_core/delay_line.cpp
+++ b/src/audio_core/delay_line.cpp
@@ -0,0 +1,104 @@
+#include <cstring>
+#include "audio_core/delay_line.h"
+
+namespace AudioCore {
+DelayLineBase::DelayLineBase() = default;
+DelayLineBase::~DelayLineBase() = default;
+
+void DelayLineBase::Initialize(s32 max_delay_, float* src_buffer) {
+    buffer = src_buffer;
+    buffer_end = buffer + max_delay_;
+    max_delay = max_delay_;
+    output = buffer;
+    SetDelay(max_delay_);
+    Clear();
+}
+
+void DelayLineBase::SetDelay(s32 new_delay) {
+    if (max_delay < new_delay) {
+        return;
+    }
+    delay = new_delay;
+    input = (buffer + ((output - buffer) + new_delay) % (max_delay + 1));
+}
+
+s32 DelayLineBase::GetDelay() const {
+    return delay;
+}
+
+s32 DelayLineBase::GetMaxDelay() const {
+    return max_delay;
+}
+
+f32 DelayLineBase::TapOut(s32 last_sample) {
+    const float* ptr = input - (last_sample + 1);
+    if (ptr < buffer) {
+        ptr += (max_delay + 1);
+    }
+
+    return *ptr;
+}
+
+f32 DelayLineBase::Tick(f32 sample) {
+    *(input++) = sample;
+    const auto out_sample = *(output++);
+
+    if (buffer_end < input) {
+        input = buffer;
+    }
+
+    if (buffer_end < output) {
+        output = buffer;
+    }
+
+    return out_sample;
+}
+
+float* DelayLineBase::GetInput() {
+    return input;
+}
+
+const float* DelayLineBase::GetInput() const {
+    return input;
+}
+
+f32 DelayLineBase::GetOutputSample() const {
+    return *output;
+}
+
+void DelayLineBase::Clear() {
+    std::memset(buffer, 0, sizeof(float) * max_delay);
+}
+
+void DelayLineBase::Reset() {
+    buffer = nullptr;
+    buffer_end = nullptr;
+    max_delay = 0;
+    input = nullptr;
+    output = nullptr;
+    delay = 0;
+}
+
+DelayLineAllPass::DelayLineAllPass() = default;
+DelayLineAllPass::~DelayLineAllPass() = default;
+
+void DelayLineAllPass::Initialize(u32 delay_, float coeffcient_, f32* src_buffer) {
+    DelayLineBase::Initialize(delay_, src_buffer);
+    SetCoefficient(coeffcient_);
+}
+
+void DelayLineAllPass::SetCoefficient(float coeffcient_) {
+    coefficient = coeffcient_;
+}
+
+f32 DelayLineAllPass::Tick(f32 sample) {
+    const auto temp = sample - coefficient * *output;
+    return coefficient * temp + DelayLineBase::Tick(temp);
+}
+
+void DelayLineAllPass::Reset() {
+    coefficient = 0.0f;
+    DelayLineBase::Reset();
+}
+
+} // namespace AudioCore
--- a/src/audio_core/delay_line.h
+++ b/src/audio_core/delay_line.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include "common/common_types.h"
+
+namespace AudioCore {
+
+class DelayLineBase {
+public:
+    DelayLineBase();
+    ~DelayLineBase();
+
+    void Initialize(s32 max_delay_, float* src_buffer);
+    void SetDelay(s32 new_delay);
+    s32 GetDelay() const;
+    s32 GetMaxDelay() const;
+    f32 TapOut(s32 last_sample);
+    f32 Tick(f32 sample);
+    float* GetInput();
+    const float* GetInput() const;
+    f32 GetOutputSample() const;
+    void Clear();
+    void Reset();
+
+protected:
+    float* buffer{nullptr};
+    float* buffer_end{nullptr};
+    s32 max_delay{};
+    float* input{nullptr};
+    float* output{nullptr};
+    s32 delay{};
+};
+
+class DelayLineAllPass final : public DelayLineBase {
+public:
+    DelayLineAllPass();
+    ~DelayLineAllPass();
+
+    void Initialize(u32 delay, float coeffcient_, f32* src_buffer);
+    void SetCoefficient(float coeffcient_);
+    f32 Tick(f32 sample);
+    void Reset();
+
+private:
+    float coefficient{};
+};
+} // namespace AudioCore
--- a/src/audio_core/effect_context.cpp
+++ b/src/audio_core/effect_context.cpp
@@ -90,6 +90,14 @@ s32 EffectBase::GetProcessingOrder() const {
    return processing_order;
 }

+std::vector<u8>& EffectBase::GetWorkBuffer() {
+    return work_buffer;
+}
+
+const std::vector<u8>& EffectBase::GetWorkBuffer() const {
+    return work_buffer;
+}
+
 EffectI3dl2Reverb::EffectI3dl2Reverb() : EffectGeneric(EffectType::I3dl2Reverb) {}
 EffectI3dl2Reverb::~EffectI3dl2Reverb() = default;

@@ -117,6 +125,12 @@ void EffectI3dl2Reverb::Update(EffectInfo::InParams& in_params) {
        usage = UsageState::Initialized;
        params.status = ParameterStatus::Initialized;
        skipped = in_params.buffer_address == 0 || in_params.buffer_size == 0;
+        if (!skipped) {
+            auto& cur_work_buffer = GetWorkBuffer();
+            // Has two buffers internally
+            cur_work_buffer.resize(in_params.buffer_size * 2);
+            std::fill(cur_work_buffer.begin(), cur_work_buffer.end(), 0);
+        }
    }
 }

@@ -129,6 +143,14 @@ void EffectI3dl2Reverb::UpdateForCommandGeneration() {
    GetParams().status = ParameterStatus::Updated;
 }

+I3dl2ReverbState& EffectI3dl2Reverb::GetState() {
+    return state;
+}
+
+const I3dl2ReverbState& EffectI3dl2Reverb::GetState() const {
+    return state;
+}
+
 EffectBiquadFilter::EffectBiquadFilter() : EffectGeneric(EffectType::BiquadFilter) {}
 EffectBiquadFilter::~EffectBiquadFilter() = default;

--- a/src/audio_core/effect_context.h
+++ b/src/audio_core/effect_context.h
@@ -8,6 +8,7 @@
 #include <memory>
 #include <vector>
 #include "audio_core/common.h"
+#include "audio_core/delay_line.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/swap.h"
@@ -194,6 +195,8 @@ public:
    [[nodiscard]] bool IsEnabled() const;
    [[nodiscard]] s32 GetMixID() const;
    [[nodiscard]] s32 GetProcessingOrder() const;
+    [[nodiscard]] std::vector<u8>& GetWorkBuffer();
+    [[nodiscard]] const std::vector<u8>& GetWorkBuffer() const;

 protected:
    UsageState usage{UsageState::Invalid};
@@ -201,6 +204,7 @@ protected:
    s32 mix_id{};
    s32 processing_order{};
    bool enabled = false;
+    std::vector<u8> work_buffer{};
 };

 template <typename T>
@@ -212,7 +216,7 @@ public:
        return internal_params;
    }

-    const I3dl2ReverbParams& GetParams() const {
+    const T& GetParams() const {
        return internal_params;
    }

@@ -229,6 +233,27 @@ public:
    void UpdateForCommandGeneration() override;
 };

+struct I3dl2ReverbState {
+    f32 lowpass_0{};
+    f32 lowpass_1{};
+    f32 lowpass_2{};
+
+    DelayLineBase early_delay_line{};
+    std::array<u32, AudioCommon::I3DL2REVERB_TAPS> early_tap_steps{};
+    f32 early_gain{};
+    f32 late_gain{};
+
+    u32 early_to_late_taps{};
+    std::array<DelayLineBase, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> fdn_delay_line{};
+    std::array<DelayLineAllPass, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> decay_delay_line0{};
+    std::array<DelayLineAllPass, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> decay_delay_line1{};
+    f32 last_reverb_echo{};
+    DelayLineBase center_delay_line{};
+    std::array<std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT>, 3> lpf_coefficients{};
+    std::array<f32, AudioCommon::I3DL2REVERB_DELAY_LINE_COUNT> shelf_filter{};
+    f32 dry_gain{};
+};
+
 class EffectI3dl2Reverb : public EffectGeneric<I3dl2ReverbParams> {
 public:
    explicit EffectI3dl2Reverb();
@@ -237,8 +262,12 @@ public:
    void Update(EffectInfo::InParams& in_params) override;
    void UpdateForCommandGeneration() override;

+    I3dl2ReverbState& GetState();
+    const I3dl2ReverbState& GetState() const;
+
 private:
    bool skipped = false;
+    I3dl2ReverbState state{};
 };

 class EffectBiquadFilter : public EffectGeneric<BiquadFilterParams> {
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -168,7 +168,6 @@ add_library(common STATIC
    time_zone.cpp
    time_zone.h
    tree.h
-    uint128.cpp
    uint128.h
    uuid.cpp
    uuid.h
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -1,71 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#ifdef _MSC_VER
-#include <intrin.h>
-
-#pragma intrinsic(_umul128)
-#pragma intrinsic(_udiv128)
-#endif
-#include <cstring>
-#include "common/uint128.h"
-
-namespace Common {
-
-#ifdef _MSC_VER
-
-u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
-    u128 r{};
-    r[0] = _umul128(a, b, &r[1]);
-    u64 remainder;
-#if _MSC_VER < 1923
-    return udiv128(r[1], r[0], d, &remainder);
-#else
-    return _udiv128(r[1], r[0], d, &remainder);
-#endif
-}
-
-#else
-
-u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
-    const u64 diva = a / d;
-    const u64 moda = a % d;
-    const u64 divb = b / d;
-    const u64 modb = b % d;
-    return diva * b + moda * divb + moda * modb / d;
-}
-
-#endif
-
-u128 Multiply64Into128(u64 a, u64 b) {
-    u128 result;
-#ifdef _MSC_VER
-    result[0] = _umul128(a, b, &result[1]);
-#else
-    unsigned __int128 tmp = a;
-    tmp *= b;
-    std::memcpy(&result, &tmp, sizeof(u128));
-#endif
-    return result;
-}
-
-std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
-    u64 remainder = dividend[0] % divisor;
-    u64 accum = dividend[0] / divisor;
-    if (dividend[1] == 0)
-        return {accum, remainder};
-    // We ignore dividend[1] / divisor as that overflows
-    const u64 first_segment = (dividend[1] % divisor) << 32;
-    accum += (first_segment / divisor) << 32;
-    const u64 second_segment = (first_segment % divisor) << 32;
-    accum += (second_segment / divisor);
-    remainder += second_segment % divisor;
-    if (remainder >= divisor) {
-        accum++;
-        remainder -= divisor;
-    }
-    return {accum, remainder};
-}
-
-} // namespace Common
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -4,19 +4,98 @@

 #pragma once

+#include <cstring>
 #include <utility>
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#pragma intrinsic(__umulh)
+#pragma intrinsic(_umul128)
+#pragma intrinsic(_udiv128)
+#else
+#include <x86intrin.h>
+#endif
+
 #include "common/common_types.h"

 namespace Common {

 // This function multiplies 2 u64 values and divides it by a u64 value.
-[[nodiscard]] u64 MultiplyAndDivide64(u64 a, u64 b, u64 d);
+[[nodiscard]] static inline u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
+#ifdef _MSC_VER
+    u128 r{};
+    r[0] = _umul128(a, b, &r[1]);
+    u64 remainder;
+#if _MSC_VER < 1923
+    return udiv128(r[1], r[0], d, &remainder);
+#else
+    return _udiv128(r[1], r[0], d, &remainder);
+#endif
+#else
+    const u64 diva = a / d;
+    const u64 moda = a % d;
+    const u64 divb = b / d;
+    const u64 modb = b % d;
+    return diva * b + moda * divb + moda * modb / d;
+#endif
+}

 // This function multiplies 2 u64 values and produces a u128 value;
-[[nodiscard]] u128 Multiply64Into128(u64 a, u64 b);
+[[nodiscard]] static inline u128 Multiply64Into128(u64 a, u64 b) {
+    u128 result;
+#ifdef _MSC_VER
+    result[0] = _umul128(a, b, &result[1]);
+#else
+    unsigned __int128 tmp = a;
+    tmp *= b;
+    std::memcpy(&result, &tmp, sizeof(u128));
+#endif
+    return result;
+}

-// This function divides a u128 by a u32 value and produces two u64 values:
-// the result of division and the remainder
-[[nodiscard]] std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
+[[nodiscard]] static inline u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
+#ifdef __SIZEOF_INT128__
+    const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
+    return static_cast<u64>(base / divisor);
+#elif defined(_M_X64) || defined(_M_ARM64)
+    std::array<u64, 2> r = {0, numerator};
+    u64 remainder;
+#if _MSC_VER < 1923
+    return udiv128(r[1], r[0], divisor, &remainder);
+#else
+    return _udiv128(r[1], r[0], divisor, &remainder);
+#endif
+#else
+    // This one is bit more inaccurate.
+    return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
+#endif
+}
+
+[[nodiscard]] static inline u64 MultiplyHigh(u64 a, u64 b) {
+#ifdef __SIZEOF_INT128__
+    return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
+#elif defined(_M_X64) || defined(_M_ARM64)
+    return __umulh(a, b); // MSVC
+#else
+    // Generic fallback
+    const u64 a_lo = u32(a);
+    const u64 a_hi = a >> 32;
+    const u64 b_lo = u32(b);
+    const u64 b_hi = b >> 32;
+
+    const u64 a_x_b_hi = a_hi * b_hi;
+    const u64 a_x_b_mid = a_hi * b_lo;
+    const u64 b_x_a_mid = b_hi * a_lo;
+    const u64 a_x_b_lo = a_lo * b_lo;
+
+    const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
+                           static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
+                          32;
+
+    const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
+
+    return multhi;
+#endif
+}

 } // namespace Common
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <cstdint>
+
 #include "common/uint128.h"
 #include "common/wall_clock.h"

@@ -18,7 +20,9 @@ using base_time_point = std::chrono::time_point<base_timer>;
 class StandardWallClock final : public WallClock {
 public:
    explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_)
-        : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false) {
+        : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false),
+          emulated_clock_factor{GetFixedPoint64Factor(emulated_clock_frequency, 1000000000)},
+          emulated_cpu_factor{GetFixedPoint64Factor(emulated_cpu_frequency, 1000000000)} {
        start_time = base_timer::now();
    }

@@ -41,16 +45,11 @@ public:
    }

    u64 GetClockCycles() override {
-        std::chrono::nanoseconds time_now = GetTimeNS();
-        const u128 temporary =
-            Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
-        return Common::Divide128On32(temporary, 1000000000).first;
+        return MultiplyHigh(GetTimeNS().count(), emulated_clock_factor);
    }

    u64 GetCPUCycles() override {
-        std::chrono::nanoseconds time_now = GetTimeNS();
-        const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
-        return Common::Divide128On32(temporary, 1000000000).first;
+        return MultiplyHigh(GetTimeNS().count(), emulated_cpu_factor);
    }

    void Pause([[maybe_unused]] bool is_paused) override {
@@ -59,6 +58,8 @@ public:

 private:
    base_time_point start_time;
+    const u64 emulated_clock_factor;
+    const u64 emulated_cpu_factor;
 };

 #ifdef ARCHITECTURE_x86_64
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -8,68 +8,10 @@
 #include <mutex>
 #include <thread>

-#ifdef _MSC_VER
-#include <intrin.h>
-
-#pragma intrinsic(__umulh)
-#pragma intrinsic(_udiv128)
-#else
-#include <x86intrin.h>
-#endif
-
 #include "common/atomic_ops.h"
 #include "common/uint128.h"
 #include "common/x64/native_clock.h"

-namespace {
-
-[[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
-#ifdef __SIZEOF_INT128__
-    const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
-    return static_cast<u64>(base / divisor);
-#elif defined(_M_X64) || defined(_M_ARM64)
-    std::array<u64, 2> r = {0, numerator};
-    u64 remainder;
-#if _MSC_VER < 1923
-    return udiv128(r[1], r[0], divisor, &remainder);
-#else
-    return _udiv128(r[1], r[0], divisor, &remainder);
-#endif
-#else
-    // This one is bit more inaccurate.
-    return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
-#endif
-}
-
-[[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) {
-#ifdef __SIZEOF_INT128__
-    return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
-#elif defined(_M_X64) || defined(_M_ARM64)
-    return __umulh(a, b); // MSVC
-#else
-    // Generic fallback
-    const u64 a_lo = u32(a);
-    const u64 a_hi = a >> 32;
-    const u64 b_lo = u32(b);
-    const u64 b_hi = b >> 32;
-
-    const u64 a_x_b_hi = a_hi * b_hi;
-    const u64 a_x_b_mid = a_hi * b_lo;
-    const u64 b_x_a_mid = b_hi * a_lo;
-    const u64 a_x_b_lo = a_lo * b_lo;
-
-    const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
-                           static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
-                          32;
-
-    const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
-
-    return multhi;
-#endif
-}
-
-} // namespace
-
 namespace Common {

 u64 EstimateRDTSCFrequency() {
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -19,7 +19,6 @@ add_library(core STATIC
    core.h
    core_timing.cpp
    core_timing.h
-    core_timing_util.cpp
    core_timing_util.h
    cpu_manager.cpp
    cpu_manager.h
@@ -148,7 +147,7 @@ add_library(core STATIC
    hle/kernel/client_session.h
    hle/kernel/code_set.cpp
    hle/kernel/code_set.h
-    hle/kernel/errors.h
+    hle/kernel/svc_results.h
    hle/kernel/global_scheduler_context.cpp
    hle/kernel/global_scheduler_context.h
    hle/kernel/handle_table.cpp
@@ -174,6 +173,7 @@ add_library(core STATIC
    hle/kernel/k_scheduler.h
    hle/kernel/k_scheduler_lock.h
    hle/kernel/k_scoped_lock.h
+    hle/kernel/k_scoped_resource_reservation.h
    hle/kernel/k_scoped_scheduler_lock_and_sleep.h
    hle/kernel/k_synchronization_object.cpp
    hle/kernel/k_synchronization_object.h
@@ -223,7 +223,6 @@ add_library(core STATIC
    hle/kernel/svc.cpp
    hle/kernel/svc.h
    hle/kernel/svc_common.h
-    hle/kernel/svc_results.h
    hle/kernel/svc_types.h
    hle/kernel/svc_wrap.h
    hle/kernel/time_manager.cpp
@@ -266,6 +265,7 @@ add_library(core STATIC
    hle/service/am/applets/software_keyboard.h
    hle/service/am/applets/web_browser.cpp
    hle/service/am/applets/web_browser.h
+    hle/service/am/applets/web_types.h
    hle/service/am/idle.cpp
    hle/service/am/idle.h
    hle/service/am/omm.cpp
@@ -400,6 +400,7 @@ add_library(core STATIC
    hle/service/hid/controllers/xpad.h
    hle/service/lbl/lbl.cpp
    hle/service/lbl/lbl.h
+    hle/service/ldn/errors.h
    hle/service/ldn/ldn.cpp
    hle/service/ldn/ldn.h
    hle/service/ldr/ldr.cpp
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -1,84 +0,0 @@
-// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
-// Licensed under GPLv2+
-// Refer to the license.txt file included.
-
-#include "core/core_timing_util.h"
-
-#include <cinttypes>
-#include <limits>
-#include "common/logging/log.h"
-#include "common/uint128.h"
-#include "core/hardware_properties.h"
-
-namespace Core::Timing {
-
-constexpr u64 MAX_VALUE_TO_MULTIPLY = std::numeric_limits<s64>::max() / Hardware::BASE_CLOCK_RATE;
-
-s64 msToCycles(std::chrono::milliseconds ms) {
-    if (static_cast<u64>(ms.count() / 1000) > MAX_VALUE_TO_MULTIPLY) {
-        LOG_ERROR(Core_Timing, "Integer overflow, use max value");
-        return std::numeric_limits<s64>::max();
-    }
-    if (static_cast<u64>(ms.count()) > MAX_VALUE_TO_MULTIPLY) {
-        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
-        return Hardware::BASE_CLOCK_RATE * (ms.count() / 1000);
-    }
-    return (Hardware::BASE_CLOCK_RATE * ms.count()) / 1000;
-}
-
-s64 usToCycles(std::chrono::microseconds us) {
-    if (static_cast<u64>(us.count() / 1000000) > MAX_VALUE_TO_MULTIPLY) {
-        LOG_ERROR(Core_Timing, "Integer overflow, use max value");
-        return std::numeric_limits<s64>::max();
-    }
-    if (static_cast<u64>(us.count()) > MAX_VALUE_TO_MULTIPLY) {
-        LOG_DEBUG(Core_Timing, "Time very big, do rounding");
-        return Hardware::BASE_CLOCK_RATE * (us.count() / 1000000);
-    }
-    return (Hardware::BASE_CLOCK_RATE * us.count()) / 1000000;
-}
-
-s64 nsToCycles(std::chrono::nanoseconds ns) {
-    const u128 temporal = Common::Multiply64Into128(ns.count(), Hardware::BASE_CLOCK_RATE);
-    return Common::Divide128On32(temporal, static_cast<u32>(1000000000)).first;
-}
-
-u64 msToClockCycles(std::chrono::milliseconds ns) {
-    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
-    return Common::Divide128On32(temp, 1000).first;
-}
-
-u64 usToClockCycles(std::chrono::microseconds ns) {
-    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
-    return Common::Divide128On32(temp, 1000000).first;
-}
-
-u64 nsToClockCycles(std::chrono::nanoseconds ns) {
-    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
-    return Common::Divide128On32(temp, 1000000000).first;
-}
-
-u64 CpuCyclesToClockCycles(u64 ticks) {
-    const u128 temporal = Common::Multiply64Into128(ticks, Hardware::CNTFREQ);
-    return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
-}
-
-std::chrono::milliseconds CyclesToMs(s64 cycles) {
-    const u128 temporal = Common::Multiply64Into128(cycles, 1000);
-    u64 ms = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
-    return std::chrono::milliseconds(ms);
-}
-
-std::chrono::nanoseconds CyclesToNs(s64 cycles) {
-    const u128 temporal = Common::Multiply64Into128(cycles, 1000000000);
-    u64 ns = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
-    return std::chrono::nanoseconds(ns);
-}
-
-std::chrono::microseconds CyclesToUs(s64 cycles) {
-    const u128 temporal = Common::Multiply64Into128(cycles, 1000000);
-    u64 us = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
-    return std::chrono::microseconds(us);
-}
-
-} // namespace Core::Timing
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -1,24 +1,59 @@
-// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
-// Licensed under GPLv2+
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

 #pragma once

 #include <chrono>
+
 #include "common/common_types.h"
+#include "core/hardware_properties.h"

 namespace Core::Timing {

-s64 msToCycles(std::chrono::milliseconds ms);
-s64 usToCycles(std::chrono::microseconds us);
-s64 nsToCycles(std::chrono::nanoseconds ns);
-u64 msToClockCycles(std::chrono::milliseconds ns);
-u64 usToClockCycles(std::chrono::microseconds ns);
-u64 nsToClockCycles(std::chrono::nanoseconds ns);
-std::chrono::milliseconds CyclesToMs(s64 cycles);
-std::chrono::nanoseconds CyclesToNs(s64 cycles);
-std::chrono::microseconds CyclesToUs(s64 cycles);
+namespace detail {
+constexpr u64 CNTFREQ_ADJUSTED = Hardware::CNTFREQ / 1000;
+constexpr u64 BASE_CLOCK_RATE_ADJUSTED = Hardware::BASE_CLOCK_RATE / 1000;
+} // namespace detail

-u64 CpuCyclesToClockCycles(u64 ticks);
+[[nodiscard]] constexpr s64 msToCycles(std::chrono::milliseconds ms) {
+    return ms.count() * detail::BASE_CLOCK_RATE_ADJUSTED;
+}
+
+[[nodiscard]] constexpr s64 usToCycles(std::chrono::microseconds us) {
+    return us.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000;
+}
+
+[[nodiscard]] constexpr s64 nsToCycles(std::chrono::nanoseconds ns) {
+    return ns.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000000;
+}
+
+[[nodiscard]] constexpr u64 msToClockCycles(std::chrono::milliseconds ms) {
+    return static_cast<u64>(ms.count()) * detail::CNTFREQ_ADJUSTED;
+}
+
+[[nodiscard]] constexpr u64 usToClockCycles(std::chrono::microseconds us) {
+    return us.count() * detail::CNTFREQ_ADJUSTED / 1000;
+}
+
+[[nodiscard]] constexpr u64 nsToClockCycles(std::chrono::nanoseconds ns) {
+    return ns.count() * detail::CNTFREQ_ADJUSTED / 1000000;
+}
+
+[[nodiscard]] constexpr u64 CpuCyclesToClockCycles(u64 ticks) {
+    return ticks * detail::CNTFREQ_ADJUSTED / detail::BASE_CLOCK_RATE_ADJUSTED;
+}
+
+[[nodiscard]] constexpr std::chrono::milliseconds CyclesToMs(s64 cycles) {
+    return std::chrono::milliseconds(cycles / detail::BASE_CLOCK_RATE_ADJUSTED);
+}
+
+[[nodiscard]] constexpr std::chrono::nanoseconds CyclesToNs(s64 cycles) {
+    return std::chrono::nanoseconds(cycles * 1000000 / detail::BASE_CLOCK_RATE_ADJUSTED);
+}
+
+[[nodiscard]] constexpr std::chrono::microseconds CyclesToUs(s64 cycles) {
+    return std::chrono::microseconds(cycles * 1000 / detail::BASE_CLOCK_RATE_ADJUSTED);
+}

 } // namespace Core::Timing
--- a/src/core/frontend/applets/controller.h
+++ b/src/core/frontend/applets/controller.h
@@ -31,6 +31,7 @@ struct ControllerParameters {
    bool allow_dual_joycons{};
    bool allow_left_joycon{};
    bool allow_right_joycon{};
+    bool allow_gamecube_controller{};
 };

 class ControllerApplet {
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -4,11 +4,11 @@

 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/server_port.h"
 #include "core/hle/kernel/session.h"
+#include "core/hle/kernel/svc_results.h"

 namespace Kernel {

@@ -21,7 +21,7 @@ std::shared_ptr<ServerPort> ClientPort::GetServerPort() const {

 ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() {
    if (active_sessions >= max_sessions) {
-        return ERR_MAX_CONNECTIONS_REACHED;
+        return ResultMaxConnectionsReached;
    }
    active_sessions++;

--- a/src/core/hle/kernel/client_session.cpp
+++ b/src/core/hle/kernel/client_session.cpp
@@ -3,11 +3,11 @@
 // Refer to the license.txt file included.

 #include "core/hle/kernel/client_session.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/k_thread.h"
 #include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/session.h"
+#include "core/hle/kernel/svc_results.h"
 #include "core/hle/result.h"

 namespace Kernel {
@@ -43,7 +43,7 @@ ResultCode ClientSession::SendSyncRequest(std::shared_ptr<KThread> thread,
                                          Core::Timing::CoreTiming& core_timing) {
    // Keep ServerSession alive until we're done working with it.
    if (!parent->Server()) {
-        return ERR_SESSION_CLOSED_BY_REMOTE;
+        return ResultSessionClosedByRemote;
    }

    // Signal the server session that new data is available
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -1,43 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "core/hle/result.h"
-
-namespace Kernel {
-
-// Confirmed Switch kernel error codes
-
-constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
-constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
-constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59};
-constexpr ResultCode ERR_TERMINATION_REQUESTED{ErrorModule::Kernel, 59};
-constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
-constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
-constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103};
-constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
-constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
-constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
-constexpr ResultCode ERR_INVALID_CURRENT_MEMORY{ErrorModule::Kernel, 106};
-constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
-constexpr ResultCode ERR_INVALID_MEMORY_RANGE{ErrorModule::Kernel, 110};
-constexpr ResultCode ERR_INVALID_PROCESSOR_ID{ErrorModule::Kernel, 113};
-constexpr ResultCode ERR_INVALID_THREAD_PRIORITY{ErrorModule::Kernel, 112};
-constexpr ResultCode ERR_INVALID_HANDLE{ErrorModule::Kernel, 114};
-constexpr ResultCode ERR_INVALID_POINTER{ErrorModule::Kernel, 115};
-constexpr ResultCode ERR_INVALID_COMBINATION{ErrorModule::Kernel, 116};
-constexpr ResultCode RESULT_TIMEOUT{ErrorModule::Kernel, 117};
-constexpr ResultCode ERR_SYNCHRONIZATION_CANCELED{ErrorModule::Kernel, 118};
-constexpr ResultCode ERR_CANCELLED{ErrorModule::Kernel, 118};
-constexpr ResultCode ERR_OUT_OF_RANGE{ErrorModule::Kernel, 119};
-constexpr ResultCode ERR_INVALID_ENUM_VALUE{ErrorModule::Kernel, 120};
-constexpr ResultCode ERR_NOT_FOUND{ErrorModule::Kernel, 121};
-constexpr ResultCode ERR_BUSY{ErrorModule::Kernel, 122};
-constexpr ResultCode ERR_SESSION_CLOSED_BY_REMOTE{ErrorModule::Kernel, 123};
-constexpr ResultCode ERR_INVALID_STATE{ErrorModule::Kernel, 125};
-constexpr ResultCode ERR_RESERVED_VALUE{ErrorModule::Kernel, 126};
-constexpr ResultCode ERR_RESOURCE_LIMIT_EXCEEDED{ErrorModule::Kernel, 132};
-
-} // namespace Kernel
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -6,12 +6,12 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/k_thread.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
+#include "core/hle/kernel/svc_results.h"

 namespace Kernel {
 namespace {
@@ -33,7 +33,7 @@ HandleTable::~HandleTable() = default;
 ResultCode HandleTable::SetSize(s32 handle_table_size) {
    if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
        LOG_ERROR(Kernel, "Handle table size {} is greater than {}", handle_table_size, MAX_COUNT);
-        return ERR_OUT_OF_MEMORY;
+        return ResultOutOfMemory;
    }

    // Values less than or equal to zero indicate to use the maximum allowable
@@ -53,7 +53,7 @@ ResultVal<Handle> HandleTable::Create(std::shared_ptr<Object> obj) {
    const u16 slot = next_free_slot;
    if (slot >= table_size) {
        LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
-        return ERR_HANDLE_TABLE_FULL;
+        return ResultHandleTableFull;
    }
    next_free_slot = generations[slot];

@@ -76,7 +76,7 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
    std::shared_ptr<Object> object = GetGeneric(handle);
    if (object == nullptr) {
        LOG_ERROR(Kernel, "Tried to duplicate invalid handle: {:08X}", handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
    }
    return Create(std::move(object));
 }
@@ -84,7 +84,7 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
 ResultCode HandleTable::Close(Handle handle) {
    if (!IsValid(handle)) {
        LOG_ERROR(Kernel, "Handle is not valid! handle={:08X}", handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
    }

    const u16 slot = GetSlot(handle);
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -14,7 +14,6 @@
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/hle_ipc.h"
 #include "core/hle/kernel/k_readable_event.h"
@@ -26,6 +25,7 @@
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/server_session.h"
+#include "core/hle/kernel/svc_results.h"
 #include "core/hle/kernel/time_manager.h"
 #include "core/memory.h"

--- a/src/core/hle/kernel/k_address_arbiter.cpp
+++ b/src/core/hle/kernel/k_address_arbiter.cpp
@@ -120,10 +120,10 @@ ResultCode KAddressArbiter::SignalAndIncrementIfEqual(VAddr addr, s32 value, s32
        s32 user_value{};
        if (!UpdateIfEqual(system, &user_value, addr, value, value + 1)) {
            LOG_ERROR(Kernel, "Invalid current memory!");
-            return Svc::ResultInvalidCurrentMemory;
+            return ResultInvalidCurrentMemory;
        }
        if (user_value != value) {
-            return Svc::ResultInvalidState;
+            return ResultInvalidState;
        }

        auto it = thread_tree.nfind_light({addr, -1});
@@ -189,10 +189,10 @@ ResultCode KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32

        if (!succeeded) {
            LOG_ERROR(Kernel, "Invalid current memory!");
-            return Svc::ResultInvalidCurrentMemory;
+            return ResultInvalidCurrentMemory;
        }
        if (user_value != value) {
-            return Svc::ResultInvalidState;
+            return ResultInvalidState;
        }

        while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
@@ -221,11 +221,11 @@ ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement
        // Check that the thread isn't terminating.
        if (cur_thread->IsTerminationRequested()) {
            slp.CancelSleep();
-            return Svc::ResultTerminationRequested;
+            return ResultTerminationRequested;
        }

        // Set the synced object.
-        cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+        cur_thread->SetSyncedObject(nullptr, ResultTimedOut);

        // Read the value from userspace.
        s32 user_value{};
@@ -238,19 +238,19 @@ ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement

        if (!succeeded) {
            slp.CancelSleep();
-            return Svc::ResultInvalidCurrentMemory;
+            return ResultInvalidCurrentMemory;
        }

        // Check that the value is less than the specified one.
        if (user_value >= value) {
            slp.CancelSleep();
-            return Svc::ResultInvalidState;
+            return ResultInvalidState;
        }

        // Check that the timeout is non-zero.
        if (timeout == 0) {
            slp.CancelSleep();
-            return Svc::ResultTimedOut;
+            return ResultTimedOut;
        }

        // Set the arbiter.
@@ -288,29 +288,29 @@ ResultCode KAddressArbiter::WaitIfEqual(VAddr addr, s32 value, s64 timeout) {
        // Check that the thread isn't terminating.
        if (cur_thread->IsTerminationRequested()) {
            slp.CancelSleep();
-            return Svc::ResultTerminationRequested;
+            return ResultTerminationRequested;
        }

        // Set the synced object.
-        cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+        cur_thread->SetSyncedObject(nullptr, ResultTimedOut);

        // Read the value from userspace.
        s32 user_value{};
        if (!ReadFromUser(system, &user_value, addr)) {
            slp.CancelSleep();
-            return Svc::ResultInvalidCurrentMemory;
+            return ResultInvalidCurrentMemory;
        }

        // Check that the value is equal.
        if (value != user_value) {
            slp.CancelSleep();
-            return Svc::ResultInvalidState;
+            return ResultInvalidState;
        }

        // Check that the timeout is non-zero.
        if (timeout == 0) {
            slp.CancelSleep();
-            return Svc::ResultTimedOut;
+            return ResultTimedOut;
        }

        // Set the arbiter.
--- a/src/core/hle/kernel/k_condition_variable.cpp
+++ b/src/core/hle/kernel/k_condition_variable.cpp
@@ -92,10 +92,10 @@ ResultCode KConditionVariable::SignalToAddress(VAddr addr) {
        // Write the value to userspace.
        if (!WriteToUser(system, addr, std::addressof(next_value))) {
            if (next_owner_thread) {
-                next_owner_thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory);
+                next_owner_thread->SetSyncedObject(nullptr, ResultInvalidCurrentMemory);
            }

-            return Svc::ResultInvalidCurrentMemory;
+            return ResultInvalidCurrentMemory;
        }
    }

@@ -114,20 +114,20 @@ ResultCode KConditionVariable::WaitForAddress(Handle handle, VAddr addr, u32 val
            cur_thread->SetSyncedObject(nullptr, RESULT_SUCCESS);

            // Check if the thread should terminate.
-            R_UNLESS(!cur_thread->IsTerminationRequested(), Svc::ResultTerminationRequested);
+            R_UNLESS(!cur_thread->IsTerminationRequested(), ResultTerminationRequested);

            {
                // Read the tag from userspace.
                u32 test_tag{};
                R_UNLESS(ReadFromUser(system, std::addressof(test_tag), addr),
-                         Svc::ResultInvalidCurrentMemory);
+                         ResultInvalidCurrentMemory);

                // If the tag isn't the handle (with wait mask), we're done.
                R_UNLESS(test_tag == (handle | Svc::HandleWaitMask), RESULT_SUCCESS);

                // Get the lock owner thread.
                owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<KThread>(handle);
-                R_UNLESS(owner_thread, Svc::ResultInvalidHandle);
+                R_UNLESS(owner_thread, ResultInvalidHandle);

                // Update the lock.
                cur_thread->SetAddressKey(addr, value);
@@ -191,13 +191,13 @@ KThread* KConditionVariable::SignalImpl(KThread* thread) {
                thread_to_close = owner_thread.get();
            } else {
                // The lock was tagged with a thread that doesn't exist.
-                thread->SetSyncedObject(nullptr, Svc::ResultInvalidState);
+                thread->SetSyncedObject(nullptr, ResultInvalidState);
                thread->Wakeup();
            }
        }
    } else {
        // If the address wasn't accessible, note so.
-        thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory);
+        thread->SetSyncedObject(nullptr, ResultInvalidCurrentMemory);
        thread->Wakeup();
    }

@@ -263,12 +263,12 @@ ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout)
        KScopedSchedulerLockAndSleep slp{kernel, cur_thread, timeout};

        // Set the synced object.
-        cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+        cur_thread->SetSyncedObject(nullptr, ResultTimedOut);

        // Check that the thread isn't terminating.
        if (cur_thread->IsTerminationRequested()) {
            slp.CancelSleep();
-            return Svc::ResultTerminationRequested;
+            return ResultTerminationRequested;
        }

        // Update the value and process for the next owner.
@@ -302,7 +302,7 @@ ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout)
            // Write the value to userspace.
            if (!WriteToUser(system, addr, std::addressof(next_value))) {
                slp.CancelSleep();
-                return Svc::ResultInvalidCurrentMemory;
+                return ResultInvalidCurrentMemory;
            }
        }

--- a/src/core/hle/kernel/k_readable_event.cpp
+++ b/src/core/hle/kernel/k_readable_event.cpp
@@ -6,7 +6,6 @@
 #include "common/assert.h"
 #include "common/common_funcs.h"
 #include "common/logging/log.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/k_readable_event.h"
 #include "core/hle/kernel/k_scheduler.h"
 #include "core/hle/kernel/k_thread.h"
@@ -47,7 +46,7 @@ ResultCode KReadableEvent::Reset() {
    KScopedSchedulerLock lk{kernel};

    if (!is_signaled) {
-        return Svc::ResultInvalidState;
+        return ResultInvalidState;
    }

    is_signaled = false;
--- a/src/core/hle/kernel/k_resource_limit.cpp
+++ b/src/core/hle/kernel/k_resource_limit.cpp
@@ -75,7 +75,7 @@ s64 KResourceLimit::GetFreeValue(LimitableResource which) const {
 ResultCode KResourceLimit::SetLimitValue(LimitableResource which, s64 value) {
    const auto index = static_cast<std::size_t>(which);
    KScopedLightLock lk(lock);
-    R_UNLESS(current_values[index] <= value, Svc::ResultInvalidState);
+    R_UNLESS(current_values[index] <= value, ResultInvalidState);

    limit_values[index] = value;

--- a/src/core/hle/kernel/k_scoped_resource_reservation.h
+++ b/src/core/hle/kernel/k_scoped_resource_reservation.h
@@ -0,0 +1,67 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// This file references various implementation details from Atmosphere, an open-source firmware for
+// the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "core/hle/kernel/k_resource_limit.h"
+#include "core/hle/kernel/process.h"
+
+namespace Kernel {
+
+class KScopedResourceReservation {
+public:
+    explicit KScopedResourceReservation(std::shared_ptr<KResourceLimit> l, LimitableResource r,
+                                        s64 v, s64 timeout)
+        : resource_limit(std::move(l)), value(v), resource(r) {
+        if (resource_limit && value) {
+            success = resource_limit->Reserve(resource, value, timeout);
+        } else {
+            success = true;
+        }
+    }
+
+    explicit KScopedResourceReservation(std::shared_ptr<KResourceLimit> l, LimitableResource r,
+                                        s64 v = 1)
+        : resource_limit(std::move(l)), value(v), resource(r) {
+        if (resource_limit && value) {
+            success = resource_limit->Reserve(resource, value);
+        } else {
+            success = true;
+        }
+    }
+
+    explicit KScopedResourceReservation(const Process* p, LimitableResource r, s64 v, s64 t)
+        : KScopedResourceReservation(p->GetResourceLimit(), r, v, t) {}
+
+    explicit KScopedResourceReservation(const Process* p, LimitableResource r, s64 v = 1)
+        : KScopedResourceReservation(p->GetResourceLimit(), r, v) {}
+
+    ~KScopedResourceReservation() noexcept {
+        if (resource_limit && value && success) {
+            // resource was not committed, release the reservation.
+            resource_limit->Release(resource, value);
+        }
+    }
+
+    /// Commit the resource reservation, destruction of this object does not release the resource
+    void Commit() {
+        resource_limit = nullptr;
+    }
+
+    [[nodiscard]] bool Succeeded() const {
+        return success;
+    }
+
+private:
+    std::shared_ptr<KResourceLimit> resource_limit;
+    s64 value;
+    LimitableResource resource;
+    bool success;
+};
+
+} // namespace Kernel
--- a/src/core/hle/kernel/k_synchronization_object.cpp
+++ b/src/core/hle/kernel/k_synchronization_object.cpp
@@ -40,20 +40,20 @@ ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
        // Check if the timeout is zero.
        if (timeout == 0) {
            slp.CancelSleep();
-            return Svc::ResultTimedOut;
+            return ResultTimedOut;
        }

        // Check if the thread should terminate.
        if (thread->IsTerminationRequested()) {
            slp.CancelSleep();
-            return Svc::ResultTerminationRequested;
+            return ResultTerminationRequested;
        }

        // Check if waiting was canceled.
        if (thread->IsWaitCancelled()) {
            slp.CancelSleep();
            thread->ClearWaitCancelled();
-            return Svc::ResultCancelled;
+            return ResultCancelled;
        }

        // Add the waiters.
@@ -75,7 +75,7 @@ ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,

        // Mark the thread as waiting.
        thread->SetCancellable();
-        thread->SetSyncedObject(nullptr, Svc::ResultTimedOut);
+        thread->SetSyncedObject(nullptr, ResultTimedOut);
        thread->SetState(ThreadState::Waiting);
        thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Synchronization);
    }
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -18,7 +18,6 @@
 #include "core/core.h"
 #include "core/cpu_manager.h"
 #include "core/hardware_properties.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/k_condition_variable.h"
 #include "core/hle/kernel/k_resource_limit.h"
@@ -127,7 +126,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s

    // Set core ID and wait result.
    core_id = phys_core;
-    wait_result = Svc::ResultNoSynchronizationObject;
+    wait_result = ResultNoSynchronizationObject;

    // Set priorities.
    priority = prio;
@@ -238,7 +237,7 @@ void KThread::Finalize() {
        while (it != waiter_list.end()) {
            // The thread shouldn't be a kernel waiter.
            it->SetLockOwner(nullptr);
-            it->SetSyncedObject(nullptr, Svc::ResultInvalidState);
+            it->SetSyncedObject(nullptr, ResultInvalidState);
            it->Wakeup();
            it = waiter_list.erase(it);
        }
@@ -447,7 +446,7 @@ ResultCode KThread::SetCoreMask(s32 core_id, u64 v_affinity_mask) {
        // If the core id is no-update magic, preserve the ideal core id.
        if (core_id == Svc::IdealCoreNoUpdate) {
            core_id = virtual_ideal_core_id;
-            R_UNLESS(((1ULL << core_id) & v_affinity_mask) != 0, Svc::ResultInvalidCombination);
+            R_UNLESS(((1ULL << core_id) & v_affinity_mask) != 0, ResultInvalidCombination);
        }

        // Set the virtual core/affinity mask.
@@ -526,7 +525,7 @@ ResultCode KThread::SetCoreMask(s32 core_id, u64 v_affinity_mask) {
                if (GetStackParameters().is_pinned) {
                    // Verify that the current thread isn't terminating.
                    R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(),
-                             Svc::ResultTerminationRequested);
+                             ResultTerminationRequested);

                    // Note that the thread was pinned.
                    thread_is_pinned = true;
@@ -604,7 +603,7 @@ void KThread::WaitCancel() {
            sleeping_queue->WakeupThread(this);
            wait_cancelled = true;
        } else {
-            SetSyncedObject(nullptr, Svc::ResultCancelled);
+            SetSyncedObject(nullptr, ResultCancelled);
            SetState(ThreadState::Runnable);
            wait_cancelled = false;
        }
@@ -663,12 +662,12 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
        // Verify our state.
        const auto cur_state = GetState();
        R_UNLESS((cur_state == ThreadState::Waiting || cur_state == ThreadState::Runnable),
-                 Svc::ResultInvalidState);
+                 ResultInvalidState);

        // Either pause or resume.
        if (activity == Svc::ThreadActivity::Paused) {
            // Verify that we're not suspended.
-            R_UNLESS(!IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState);
+            R_UNLESS(!IsSuspendRequested(SuspendType::Thread), ResultInvalidState);

            // Suspend.
            RequestSuspend(SuspendType::Thread);
@@ -676,7 +675,7 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
            ASSERT(activity == Svc::ThreadActivity::Runnable);

            // Verify that we're suspended.
-            R_UNLESS(IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState);
+            R_UNLESS(IsSuspendRequested(SuspendType::Thread), ResultInvalidState);

            // Resume.
            Resume(SuspendType::Thread);
@@ -698,7 +697,7 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) {
            if (GetStackParameters().is_pinned) {
                // Verify that the current thread isn't terminating.
                R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(),
-                         Svc::ResultTerminationRequested);
+                         ResultTerminationRequested);

                // Note that the thread was pinned and not current.
                thread_is_pinned = true;
@@ -745,7 +744,7 @@ ResultCode KThread::GetThreadContext3(std::vector<u8>& out) {
        KScopedSchedulerLock sl{kernel};

        // Verify that we're suspended.
-        R_UNLESS(IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState);
+        R_UNLESS(IsSuspendRequested(SuspendType::Thread), ResultInvalidState);

        // If we're not terminating, get the thread's user context.
        if (!IsTerminationRequested()) {
@@ -905,12 +904,11 @@ ResultCode KThread::Run() {
        KScopedSchedulerLock lk{kernel};

        // If either this thread or the current thread are requesting termination, note it.
-        R_UNLESS(!IsTerminationRequested(), Svc::ResultTerminationRequested);
-        R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(),
-                 Svc::ResultTerminationRequested);
+        R_UNLESS(!IsTerminationRequested(), ResultTerminationRequested);
+        R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), ResultTerminationRequested);

        // Ensure our thread state is correct.
-        R_UNLESS(GetState() == ThreadState::Initialized, Svc::ResultInvalidState);
+        R_UNLESS(GetState() == ThreadState::Initialized, ResultInvalidState);

        // If the current thread has been asked to suspend, suspend it and retry.
        if (GetCurrentThread(kernel).IsSuspended()) {
@@ -962,7 +960,7 @@ ResultCode KThread::Sleep(s64 timeout) {
        // Check if the thread should terminate.
        if (IsTerminationRequested()) {
            slp.CancelSleep();
-            return Svc::ResultTerminationRequested;
+            return ResultTerminationRequested;
        }

        // Mark the thread as waiting.
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -26,7 +26,6 @@
 #include "core/device_memory.h"
 #include "core/hardware_properties.h"
 #include "core/hle/kernel/client_port.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/k_resource_limit.h"
 #include "core/hle/kernel/k_scheduler.h"
@@ -39,6 +38,7 @@
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/service_thread.h"
 #include "core/hle/kernel/shared_memory.h"
+#include "core/hle/kernel/svc_results.h"
 #include "core/hle/kernel/time_manager.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
@@ -141,11 +141,17 @@ struct KernelCore::Impl {
        ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Events, 700).IsSuccess());
        ASSERT(system_resource_limit->SetLimitValue(LimitableResource::TransferMemory, 200)
                   .IsSuccess());
-        ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Sessions, 900).IsSuccess());
+        ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Sessions, 933).IsSuccess());

-        if (!system_resource_limit->Reserve(LimitableResource::PhysicalMemory, 0x60000)) {
+        // Derived from recent software updates. The kernel reserves 27MB
+        constexpr u64 kernel_size{0x1b00000};
+        if (!system_resource_limit->Reserve(LimitableResource::PhysicalMemory, kernel_size)) {
            UNREACHABLE();
        }
+        // Reserve secure applet memory, introduced in firmware 5.0.0
+        constexpr u64 secure_applet_memory_size{0x400000};
+        ASSERT(system_resource_limit->Reserve(LimitableResource::PhysicalMemory,
+                                              secure_applet_memory_size));
    }

    void InitializePreemption(KernelCore& kernel) {
@@ -302,8 +308,11 @@ struct KernelCore::Impl {
        // Allocate slab heaps
        user_slab_heap_pages = std::make_unique<Memory::SlabHeap<Memory::Page>>();

+        constexpr u64 user_slab_heap_size{0x1ef000};
+        // Reserve slab heaps
+        ASSERT(
+            system_resource_limit->Reserve(LimitableResource::PhysicalMemory, user_slab_heap_size));
        // Initialize slab heaps
-        constexpr u64 user_slab_heap_size{0x3de000};
        user_slab_heap_pages->Initialize(
            system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase),
            user_slab_heap_size);
--- a/src/core/hle/kernel/memory/memory_manager.cpp
+++ b/src/core/hle/kernel/memory/memory_manager.cpp
@@ -8,9 +8,9 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/scope_exit.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/memory/memory_manager.h"
 #include "core/hle/kernel/memory/page_linked_list.h"
+#include "core/hle/kernel/svc_results.h"

 namespace Kernel::Memory {

@@ -95,7 +95,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
    // Choose a heap based on our page size request
    const s32 heap_index{PageHeap::GetBlockIndex(num_pages)};
    if (heap_index < 0) {
-        return ERR_OUT_OF_MEMORY;
+        return ResultOutOfMemory;
    }

    // TODO (bunnei): Support multiple managers
@@ -140,7 +140,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa

    // Only succeed if we allocated as many pages as we wanted
    if (num_pages) {
-        return ERR_OUT_OF_MEMORY;
+        return ResultOutOfMemory;
    }

    // We succeeded!
--- a/src/core/hle/kernel/memory/page_table.cpp
+++ b/src/core/hle/kernel/memory/page_table.cpp
@@ -6,8 +6,7 @@
 #include "common/assert.h"
 #include "common/scope_exit.h"
 #include "core/core.h"
-#include "core/hle/kernel/errors.h"
-#include "core/hle/kernel/k_resource_limit.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/memory/address_space_info.h"
 #include "core/hle/kernel/memory/memory_block.h"
@@ -16,6 +15,7 @@
 #include "core/hle/kernel/memory/page_table.h"
 #include "core/hle/kernel/memory/system_control.h"
 #include "core/hle/kernel/process.h"
+#include "core/hle/kernel/svc_results.h"
 #include "core/memory.h"

 namespace Kernel::Memory {
@@ -141,7 +141,7 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t
        (alias_region_size + heap_region_size + stack_region_size + kernel_map_region_size)};
    if (alloc_size < needed_size) {
        UNREACHABLE();
-        return ERR_OUT_OF_MEMORY;
+        return ResultOutOfMemory;
    }

    const std::size_t remaining_size{alloc_size - needed_size};
@@ -277,11 +277,11 @@ ResultCode PageTable::MapProcessCode(VAddr addr, std::size_t num_pages, MemorySt
    const u64 size{num_pages * PageSize};

    if (!CanContain(addr, size, state)) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    if (IsRegionMapped(addr, size)) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    PageLinkedList page_linked_list;
@@ -307,7 +307,7 @@ ResultCode PageTable::MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std::
                                  MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped));

    if (IsRegionMapped(dst_addr, size)) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    PageLinkedList page_linked_list;
@@ -409,27 +409,25 @@ ResultCode PageTable::MapPhysicalMemory(VAddr addr, std::size_t size) {
        return RESULT_SUCCESS;
    }

-    auto process{system.Kernel().CurrentProcess()};
    const std::size_t remaining_size{size - mapped_size};
    const std::size_t remaining_pages{remaining_size / PageSize};

-    if (process->GetResourceLimit() &&
-        !process->GetResourceLimit()->Reserve(LimitableResource::PhysicalMemory, remaining_size)) {
-        return ERR_RESOURCE_LIMIT_EXCEEDED;
+    // Reserve the memory from the process resource limit.
+    KScopedResourceReservation memory_reservation(
+        system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory,
+        remaining_size);
+    if (!memory_reservation.Succeeded()) {
+        LOG_ERROR(Kernel, "Could not reserve remaining {:X} bytes", remaining_size);
+        return ResultResourceLimitedExceeded;
    }

    PageLinkedList page_linked_list;
-    {
-        auto block_guard = detail::ScopeExit([&] {
-            system.Kernel().MemoryManager().Free(page_linked_list, remaining_pages, memory_pool);
-            process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, remaining_size);
-        });

-        CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages,
-                                                              memory_pool));
+    CASCADE_CODE(
+        system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages, memory_pool));

-        block_guard.Cancel();
-    }
+    // We succeeded, so commit the memory reservation.
+    memory_reservation.Commit();

    MapPhysicalMemory(page_linked_list, addr, end_addr);

@@ -454,12 +452,12 @@ ResultCode PageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) {
    block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) {
        if (info.state == MemoryState::Normal) {
            if (info.attribute != MemoryAttribute::None) {
-                result = ERR_INVALID_ADDRESS_STATE;
+                result = ResultInvalidCurrentMemory;
                return;
            }
            mapped_size += GetSizeInRange(info, addr, end_addr);
        } else if (info.state != MemoryState::Free) {
-            result = ERR_INVALID_ADDRESS_STATE;
+            result = ResultInvalidCurrentMemory;
        }
    });

@@ -526,7 +524,7 @@ ResultCode PageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) {
        MemoryAttribute::Mask, MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped));

    if (IsRegionMapped(dst_addr, size)) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    PageLinkedList page_linked_list;
@@ -577,7 +575,7 @@ ResultCode PageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) {
    AddRegionToPages(dst_addr, num_pages, dst_pages);

    if (!dst_pages.IsEqual(src_pages)) {
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    {
@@ -626,11 +624,11 @@ ResultCode PageTable::MapPages(VAddr addr, PageLinkedList& page_linked_list, Mem
    const std::size_t size{num_pages * PageSize};

    if (!CanContain(addr, size, state)) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    if (IsRegionMapped(addr, num_pages * PageSize)) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    CASCADE_CODE(MapPages(addr, page_linked_list, perm));
@@ -768,7 +766,7 @@ ResultCode PageTable::SetHeapCapacity(std::size_t new_heap_capacity) {
 ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {

    if (size > heap_region_end - heap_region_start) {
-        return ERR_OUT_OF_MEMORY;
+        return ResultOutOfMemory;
    }

    const u64 previous_heap_size{GetHeapSize()};
@@ -781,10 +779,14 @@ ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {

        const u64 delta{size - previous_heap_size};

-        auto process{system.Kernel().CurrentProcess()};
-        if (process->GetResourceLimit() && delta != 0 &&
-            !process->GetResourceLimit()->Reserve(LimitableResource::PhysicalMemory, delta)) {
-            return ERR_RESOURCE_LIMIT_EXCEEDED;
+        // Reserve memory for the heap extension.
+        KScopedResourceReservation memory_reservation(
+            system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory,
+            delta);
+
+        if (!memory_reservation.Succeeded()) {
+            LOG_ERROR(Kernel, "Could not reserve heap extension of size {:X} bytes", delta);
+            return ResultResourceLimitedExceeded;
        }

        PageLinkedList page_linked_list;
@@ -794,12 +796,15 @@ ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) {
            system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool));

        if (IsRegionMapped(current_heap_addr, delta)) {
-            return ERR_INVALID_ADDRESS_STATE;
+            return ResultInvalidCurrentMemory;
        }

        CASCADE_CODE(
            Operate(current_heap_addr, num_pages, page_linked_list, OperationType::MapGroup));

+        // Succeeded in allocation, commit the resource reservation
+        memory_reservation.Commit();
+
        block_manager->Update(current_heap_addr, num_pages, MemoryState::Normal,
                              MemoryPermission::ReadAndWrite);

@@ -816,17 +821,17 @@ ResultVal<VAddr> PageTable::AllocateAndMapMemory(std::size_t needed_num_pages, s
    std::lock_guard lock{page_table_lock};

    if (!CanContain(region_start, region_num_pages * PageSize, state)) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    if (region_num_pages <= needed_num_pages) {
-        return ERR_OUT_OF_MEMORY;
+        return ResultOutOfMemory;
    }

    const VAddr addr{
        AllocateVirtualMemory(region_start, region_num_pages, needed_num_pages, align)};
    if (!addr) {
-        return ERR_OUT_OF_MEMORY;
+        return ResultOutOfMemory;
    }

    if (is_map_only) {
@@ -1105,13 +1110,13 @@ constexpr ResultCode PageTable::CheckMemoryState(const MemoryInfo& info, MemoryS
                                                 MemoryAttribute attr) const {
    // Validate the states match expectation
    if ((info.state & state_mask) != state) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }
    if ((info.perm & perm_mask) != perm) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }
    if ((info.attribute & attr_mask) != attr) {
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    return RESULT_SUCCESS;
@@ -1138,14 +1143,14 @@ ResultCode PageTable::CheckMemoryState(MemoryState* out_state, MemoryPermission*
    while (true) {
        // Validate the current block
        if (!(info.state == first_state)) {
-            return ERR_INVALID_ADDRESS_STATE;
+            return ResultInvalidCurrentMemory;
        }
        if (!(info.perm == first_perm)) {
-            return ERR_INVALID_ADDRESS_STATE;
+            return ResultInvalidCurrentMemory;
        }
        if (!((info.attribute | static_cast<MemoryAttribute>(ignore_attr)) ==
              (first_attr | static_cast<MemoryAttribute>(ignore_attr)))) {
-            return ERR_INVALID_ADDRESS_STATE;
+            return ResultInvalidCurrentMemory;
        }

        // Validate against the provided masks
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -14,9 +14,9 @@
 #include "core/device_memory.h"
 #include "core/file_sys/program_metadata.h"
 #include "core/hle/kernel/code_set.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/k_resource_limit.h"
 #include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
 #include "core/hle/kernel/k_thread.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/memory/memory_block_manager.h"
@@ -39,6 +39,7 @@ namespace {
 */
 void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) {
    const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart();
+    ASSERT(owner_process.GetResourceLimit()->Reserve(LimitableResource::Threads, 1));
    auto thread_res = KThread::Create(system, ThreadType::User, "main", entry_point, priority, 0,
                                      owner_process.GetIdealCoreId(), stack_top, &owner_process);

@@ -117,6 +118,9 @@ std::shared_ptr<Process> Process::Create(Core::System& system, std::string name,

    std::shared_ptr<Process> process = std::make_shared<Process>(system);
    process->name = std::move(name);
+
+    // TODO: This is inaccurate
+    // The process should hold a reference to the kernel-wide resource limit.
    process->resource_limit = std::make_shared<KResourceLimit>(kernel, system);
    process->status = ProcessStatus::Created;
    process->program_id = 0;
@@ -155,6 +159,9 @@ void Process::DecrementThreadCount() {
 }

 u64 Process::GetTotalPhysicalMemoryAvailable() const {
+    // TODO: This is expected to always return the application memory pool size after accurately
+    // reserving kernel resources. The current workaround uses a process-local resource limit of
+    // application memory pool size, which is inaccurate.
    const u64 capacity{resource_limit->GetFreeValue(LimitableResource::PhysicalMemory) +
                       page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
                       main_thread_stack_size};
@@ -248,8 +255,8 @@ ResultCode Process::Reset() {
    KScopedSchedulerLock sl{kernel};

    // Validate that we're in a state that we can reset.
-    R_UNLESS(status != ProcessStatus::Exited, Svc::ResultInvalidState);
-    R_UNLESS(is_signaled, Svc::ResultInvalidState);
+    R_UNLESS(status != ProcessStatus::Exited, ResultInvalidState);
+    R_UNLESS(is_signaled, ResultInvalidState);

    // Clear signaled.
    is_signaled = false;
@@ -264,6 +271,17 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata,
    system_resource_size = metadata.GetSystemResourceSize();
    image_size = code_size;

+    // Set initial resource limits
+    resource_limit->SetLimitValue(
+        LimitableResource::PhysicalMemory,
+        kernel.MemoryManager().GetSize(Memory::MemoryManager::Pool::Application));
+    KScopedResourceReservation memory_reservation(resource_limit, LimitableResource::PhysicalMemory,
+                                                  code_size + system_resource_size);
+    if (!memory_reservation.Succeeded()) {
+        LOG_ERROR(Kernel, "Could not reserve process memory requirements of size {:X} bytes",
+                  code_size + system_resource_size);
+        return ResultResourceLimitedExceeded;
+    }
    // Initialize proces address space
    if (const ResultCode result{
            page_table->InitializeForProcess(metadata.GetAddressSpaceType(), false, 0x8000000,
@@ -305,24 +323,22 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata,
        UNREACHABLE();
    }

-    // Set initial resource limits
-    resource_limit->SetLimitValue(
-        LimitableResource::PhysicalMemory,
-        kernel.MemoryManager().GetSize(Memory::MemoryManager::Pool::Application));
    resource_limit->SetLimitValue(LimitableResource::Threads, 608);
    resource_limit->SetLimitValue(LimitableResource::Events, 700);
    resource_limit->SetLimitValue(LimitableResource::TransferMemory, 128);
    resource_limit->SetLimitValue(LimitableResource::Sessions, 894);
-    ASSERT(resource_limit->Reserve(LimitableResource::PhysicalMemory, code_size));

    // Create TLS region
    tls_region_address = CreateTLSRegion();
+    memory_reservation.Commit();

    return handle_table.SetSize(capabilities.GetHandleTableSize());
 }

 void Process::Run(s32 main_thread_priority, u64 stack_size) {
    AllocateMainThreadStack(stack_size);
+    resource_limit->Reserve(LimitableResource::Threads, 1);
+    resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size);

    const std::size_t heap_capacity{memory_usage_capacity - main_thread_stack_size - image_size};
    ASSERT(!page_table->SetHeapCapacity(heap_capacity).IsError());
@@ -330,8 +346,6 @@ void Process::Run(s32 main_thread_priority, u64 stack_size) {
    ChangeStatus(ProcessStatus::Running);

    SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top);
-    resource_limit->Reserve(LimitableResource::Threads, 1);
-    resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size);
 }

 void Process::PrepareForTermination() {
@@ -358,6 +372,11 @@ void Process::PrepareForTermination() {
    FreeTLSRegion(tls_region_address);
    tls_region_address = 0;

+    if (resource_limit) {
+        resource_limit->Release(LimitableResource::PhysicalMemory,
+                                main_thread_stack_size + image_size);
+    }
+
    ChangeStatus(ProcessStatus::Exited);
 }

--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -6,10 +6,10 @@

 #include "common/bit_util.h"
 #include "common/logging/log.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/memory/page_table.h"
 #include "core/hle/kernel/process_capability.h"
+#include "core/hle/kernel/svc_results.h"

 namespace Kernel {
 namespace {
@@ -123,13 +123,13 @@ ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities,
            // If there's only one, then there's a problem.
            if (i >= num_capabilities) {
                LOG_ERROR(Kernel, "Invalid combination! i={}", i);
-                return ERR_INVALID_COMBINATION;
+                return ResultInvalidCombination;
            }

            const auto size_flags = capabilities[i];
            if (GetCapabilityType(size_flags) != CapabilityType::MapPhysical) {
                LOG_ERROR(Kernel, "Invalid capability type! size_flags={}", size_flags);
-                return ERR_INVALID_COMBINATION;
+                return ResultInvalidCombination;
            }

            const auto result = HandleMapPhysicalFlags(descriptor, size_flags, page_table);
@@ -159,7 +159,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
    const auto type = GetCapabilityType(flag);

    if (type == CapabilityType::Unset) {
-        return ERR_INVALID_CAPABILITY_DESCRIPTOR;
+        return ResultInvalidCapabilityDescriptor;
    }

    // Bail early on ignorable entries, as one would expect,
@@ -176,7 +176,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
        LOG_ERROR(Kernel,
                  "Attempted to initialize flags that may only be initialized once. set_flags={}",
                  set_flags);
-        return ERR_INVALID_COMBINATION;
+        return ResultInvalidCombination;
    }
    set_flags |= set_flag;

@@ -202,7 +202,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s
    }

    LOG_ERROR(Kernel, "Invalid capability type! type={}", type);
-    return ERR_INVALID_CAPABILITY_DESCRIPTOR;
+    return ResultInvalidCapabilityDescriptor;
 }

 void ProcessCapabilities::Clear() {
@@ -225,7 +225,7 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
    if (priority_mask != 0 || core_mask != 0) {
        LOG_ERROR(Kernel, "Core or priority mask are not zero! priority_mask={}, core_mask={}",
                  priority_mask, core_mask);
-        return ERR_INVALID_CAPABILITY_DESCRIPTOR;
+        return ResultInvalidCapabilityDescriptor;
    }

    const u32 core_num_min = (flags >> 16) & 0xFF;
@@ -233,7 +233,7 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
    if (core_num_min > core_num_max) {
        LOG_ERROR(Kernel, "Core min is greater than core max! core_num_min={}, core_num_max={}",
                  core_num_min, core_num_max);
-        return ERR_INVALID_COMBINATION;
+        return ResultInvalidCombination;
    }

    const u32 priority_min = (flags >> 10) & 0x3F;
@@ -242,13 +242,13 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) {
        LOG_ERROR(Kernel,
                  "Priority min is greater than priority max! priority_min={}, priority_max={}",
                  core_num_min, priority_max);
-        return ERR_INVALID_COMBINATION;
+        return ResultInvalidCombination;
    }

    // The switch only has 4 usable cores.
    if (core_num_max >= 4) {
        LOG_ERROR(Kernel, "Invalid max cores specified! core_num_max={}", core_num_max);
-        return ERR_INVALID_PROCESSOR_ID;
+        return ResultInvalidCoreId;
    }

    const auto make_mask = [](u64 min, u64 max) {
@@ -269,7 +269,7 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags)

    // If we've already set this svc before, bail.
    if ((set_svc_bits & svc_bit) != 0) {
-        return ERR_INVALID_COMBINATION;
+        return ResultInvalidCombination;
    }
    set_svc_bits |= svc_bit;

@@ -283,7 +283,7 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags)

        if (svc_number >= svc_capabilities.size()) {
            LOG_ERROR(Kernel, "Process svc capability is out of range! svc_number={}", svc_number);
-            return ERR_OUT_OF_RANGE;
+            return ResultOutOfRange;
        }

        svc_capabilities[svc_number] = true;
@@ -321,7 +321,7 @@ ResultCode ProcessCapabilities::HandleInterruptFlags(u32 flags) {
        if (interrupt >= interrupt_capabilities.size()) {
            LOG_ERROR(Kernel, "Process interrupt capability is out of range! svc_number={}",
                      interrupt);
-            return ERR_OUT_OF_RANGE;
+            return ResultOutOfRange;
        }

        interrupt_capabilities[interrupt] = true;
@@ -334,7 +334,7 @@ ResultCode ProcessCapabilities::HandleProgramTypeFlags(u32 flags) {
    const u32 reserved = flags >> 17;
    if (reserved != 0) {
        LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
-        return ERR_RESERVED_VALUE;
+        return ResultReservedValue;
    }

    program_type = static_cast<ProgramType>((flags >> 14) & 0b111);
@@ -354,7 +354,7 @@ ResultCode ProcessCapabilities::HandleKernelVersionFlags(u32 flags) {
        LOG_ERROR(Kernel,
                  "Kernel version is non zero or flags are too small! major_version={}, flags={}",
                  major_version, flags);
-        return ERR_INVALID_CAPABILITY_DESCRIPTOR;
+        return ResultInvalidCapabilityDescriptor;
    }

    kernel_version = flags;
@@ -365,7 +365,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
    const u32 reserved = flags >> 26;
    if (reserved != 0) {
        LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
-        return ERR_RESERVED_VALUE;
+        return ResultReservedValue;
    }

    handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
@@ -376,7 +376,7 @@ ResultCode ProcessCapabilities::HandleDebugFlags(u32 flags) {
    const u32 reserved = flags >> 19;
    if (reserved != 0) {
        LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved);
-        return ERR_RESERVED_VALUE;
+        return ResultReservedValue;
    }

    is_debuggable = (flags & 0x20000) != 0;
--- a/src/core/hle/kernel/server_port.cpp
+++ b/src/core/hle/kernel/server_port.cpp
@@ -5,11 +5,11 @@
 #include <tuple>
 #include "common/assert.h"
 #include "core/hle/kernel/client_port.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/k_thread.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/server_port.h"
 #include "core/hle/kernel/server_session.h"
+#include "core/hle/kernel/svc_results.h"

 namespace Kernel {

@@ -18,7 +18,7 @@ ServerPort::~ServerPort() = default;

 ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() {
    if (pending_sessions.empty()) {
-        return ERR_NOT_FOUND;
+        return ResultNotFound;
    }

    auto session = std::move(pending_sessions.back());
--- a/src/core/hle/kernel/session.cpp
+++ b/src/core/hle/kernel/session.cpp
@@ -4,15 +4,23 @@

 #include "common/assert.h"
 #include "core/hle/kernel/client_session.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
 #include "core/hle/kernel/server_session.h"
 #include "core/hle/kernel/session.h"

 namespace Kernel {

 Session::Session(KernelCore& kernel) : KSynchronizationObject{kernel} {}
-Session::~Session() = default;
+Session::~Session() {
+    // Release reserved resource when the Session pair was created.
+    kernel.GetSystemResourceLimit()->Release(LimitableResource::Sessions, 1);
+}

 Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
+    // Reserve a new session from the resource limit.
+    KScopedResourceReservation session_reservation(kernel.GetSystemResourceLimit(),
+                                                   LimitableResource::Sessions);
+    ASSERT(session_reservation.Succeeded());
    auto session{std::make_shared<Session>(kernel)};
    auto client_session{Kernel::ClientSession::Create(kernel, session, name + "_Client").Unwrap()};
    auto server_session{Kernel::ServerSession::Create(kernel, session, name + "_Server").Unwrap()};
@@ -21,6 +29,7 @@ Session::SessionPair Session::Create(KernelCore& kernel, std::string name) {
    session->client = client_session;
    session->server = server_session;

+    session_reservation.Commit();
    return std::make_pair(std::move(client_session), std::move(server_session));
 }

--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -4,6 +4,7 @@

 #include "common/assert.h"
 #include "core/core.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/memory/page_table.h"
 #include "core/hle/kernel/shared_memory.h"
@@ -13,7 +14,9 @@ namespace Kernel {
 SharedMemory::SharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory)
    : Object{kernel}, device_memory{device_memory} {}

-SharedMemory::~SharedMemory() = default;
+SharedMemory::~SharedMemory() {
+    kernel.GetSystemResourceLimit()->Release(LimitableResource::PhysicalMemory, size);
+}

 std::shared_ptr<SharedMemory> SharedMemory::Create(
    KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process,
@@ -21,6 +24,11 @@ std::shared_ptr<SharedMemory> SharedMemory::Create(
    Memory::MemoryPermission user_permission, PAddr physical_address, std::size_t size,
    std::string name) {

+    const auto resource_limit = kernel.GetSystemResourceLimit();
+    KScopedResourceReservation memory_reservation(resource_limit, LimitableResource::PhysicalMemory,
+                                                  size);
+    ASSERT(memory_reservation.Succeeded());
+
    std::shared_ptr<SharedMemory> shared_memory{
        std::make_shared<SharedMemory>(kernel, device_memory)};

@@ -32,6 +40,7 @@ std::shared_ptr<SharedMemory> SharedMemory::Create(
    shared_memory->size = size;
    shared_memory->name = name;

+    memory_reservation.Commit();
    return shared_memory;
 }

--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -23,7 +23,6 @@
 #include "core/cpu_manager.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/client_session.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/k_address_arbiter.h"
 #include "core/hle/kernel/k_condition_variable.h"
@@ -31,6 +30,7 @@
 #include "core/hle/kernel/k_readable_event.h"
 #include "core/hle/kernel/k_resource_limit.h"
 #include "core/hle/kernel/k_scheduler.h"
+#include "core/hle/kernel/k_scoped_resource_reservation.h"
 #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
 #include "core/hle/kernel/k_synchronization_object.h"
 #include "core/hle/kernel/k_thread.h"
@@ -71,49 +71,49 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
                                      VAddr src_addr, u64 size) {
    if (!Common::Is4KBAligned(dst_addr)) {
        LOG_ERROR(Kernel_SVC, "Destination address is not aligned to 4KB, 0x{:016X}", dst_addr);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
    }

    if (!Common::Is4KBAligned(src_addr)) {
        LOG_ERROR(Kernel_SVC, "Source address is not aligned to 4KB, 0x{:016X}", src_addr);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
    }

    if (size == 0) {
        LOG_ERROR(Kernel_SVC, "Size is 0");
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
    }

    if (!Common::Is4KBAligned(size)) {
        LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:016X}", size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
    }

    if (!IsValidAddressRange(dst_addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Destination is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
                  dst_addr, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    if (!IsValidAddressRange(src_addr, size)) {
        LOG_ERROR(Kernel_SVC, "Source is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
                  src_addr, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    if (!manager.IsInsideAddressSpace(src_addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}",
                  src_addr, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    if (manager.IsOutsideStackRegion(dst_addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Destination is not within the stack region, addr=0x{:016X}, size=0x{:016X}",
                  dst_addr, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    if (manager.IsInsideHeapRegion(dst_addr, size)) {
@@ -121,7 +121,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
                  "Destination does not fit within the heap region, addr=0x{:016X}, "
                  "size=0x{:016X}",
                  dst_addr, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    if (manager.IsInsideAliasRegion(dst_addr, size)) {
@@ -129,7 +129,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
                  "Destination does not fit within the map region, addr=0x{:016X}, "
                  "size=0x{:016X}",
                  dst_addr, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    return RESULT_SUCCESS;
@@ -138,6 +138,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds
 enum class ResourceLimitValueType {
    CurrentValue,
    LimitValue,
+    PeakValue,
 };

 ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit,
@@ -146,7 +147,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_
    const auto type = static_cast<LimitableResource>(resource_type);
    if (!IsValidResourceType(type)) {
        LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
-        return ERR_INVALID_ENUM_VALUE;
+        return ResultInvalidEnumValue;
    }

    const auto* const current_process = system.Kernel().CurrentProcess();
@@ -157,14 +158,20 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_
    if (!resource_limit_object) {
        LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}",
                  resource_limit);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
    }

-    if (value_type == ResourceLimitValueType::CurrentValue) {
+    switch (value_type) {
+    case ResourceLimitValueType::CurrentValue:
        return MakeResult(resource_limit_object->GetCurrentValue(type));
+    case ResourceLimitValueType::LimitValue:
+        return MakeResult(resource_limit_object->GetLimitValue(type));
+    case ResourceLimitValueType::PeakValue:
+        return MakeResult(resource_limit_object->GetPeakValue(type));
+    default:
+        LOG_ERROR(Kernel_SVC, "Invalid resource value_type: '{}'", value_type);
+        return ResultInvalidEnumValue;
    }
-
-    return MakeResult(resource_limit_object->GetLimitValue(type));
 }
 } // Anonymous namespace

@@ -177,12 +184,12 @@ static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_s
    if ((heap_size % 0x200000) != 0) {
        LOG_ERROR(Kernel_SVC, "The heap size is not a multiple of 2MB, heap_size=0x{:016X}",
                  heap_size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
    }

    if (heap_size >= 0x200000000) {
        LOG_ERROR(Kernel_SVC, "The heap size is not less than 8GB, heap_size=0x{:016X}", heap_size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
    }

    auto& page_table{system.Kernel().CurrentProcess()->PageTable()};
@@ -208,19 +215,19 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si

    if (!Common::Is4KBAligned(address)) {
        LOG_ERROR(Kernel_SVC, "Address not page aligned (0x{:016X})", address);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
    }

    if (size == 0 || !Common::Is4KBAligned(size)) {
        LOG_ERROR(Kernel_SVC, "Invalid size (0x{:X}). Size must be non-zero and page aligned.",
                  size);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
    }

    if (!IsValidAddressRange(address, size)) {
        LOG_ERROR(Kernel_SVC, "Address range overflowed (Address: 0x{:016X}, Size: 0x{:016X})",
                  address, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    const auto attributes{static_cast<Memory::MemoryAttribute>(mask | attribute)};
@@ -229,7 +236,7 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si
        LOG_ERROR(Kernel_SVC,
                  "Memory attribute doesn't match the given mask (Attribute: 0x{:X}, Mask: {:X}",
                  attribute, mask);
-        return ERR_INVALID_COMBINATION;
+        return ResultInvalidCombination;
    }

    auto& page_table{system.Kernel().CurrentProcess()->PageTable()};
@@ -293,7 +300,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
        LOG_ERROR(Kernel_SVC,
                  "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}",
                  port_name_address);
-        return ERR_NOT_FOUND;
+        return ResultNotFound;
    }

    static constexpr std::size_t PortNameMaxLength = 11;
@@ -302,7 +309,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
    if (port_name.size() > PortNameMaxLength) {
        LOG_ERROR(Kernel_SVC, "Port name is too long, expected {} but got {}", PortNameMaxLength,
                  port_name.size());
-        return ERR_OUT_OF_RANGE;
+        return ResultOutOfRange;
    }

    LOG_TRACE(Kernel_SVC, "called port_name={}", port_name);
@@ -311,11 +318,9 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
    const auto it = kernel.FindNamedPort(port_name);
    if (!kernel.IsValidNamedPort(it)) {
        LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name);
-        return ERR_NOT_FOUND;
+        return ResultNotFound;
    }

-    ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(LimitableResource::Sessions, 1));
-
    auto client_port = it->second;

    std::shared_ptr<ClientSession> client_session;
@@ -340,7 +345,7 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
    std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle);
    if (!session) {
        LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
    }

    LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());
@@ -405,7 +410,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
        const Process* const owner_process = thread->GetOwnerProcess();
        if (!owner_process) {
            LOG_ERROR(Kernel_SVC, "Non-existent owning process encountered.");
-            return ERR_INVALID_HANDLE;
+            return ResultInvalidHandle;
        }

        *process_id = owner_process->GetProcessID();
@@ -415,7 +420,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
    // NOTE: This should also handle debug objects before returning.

    LOG_ERROR(Kernel_SVC, "Handle does not exist, handle=0x{:08X}", handle);
-    return ERR_INVALID_HANDLE;
+    return ResultInvalidHandle;
 }

 static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* process_id_high,
@@ -438,7 +443,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha
        LOG_ERROR(Kernel_SVC,
                  "Handle address is not a valid virtual address, handle_address=0x{:016X}",
                  handles_address);
-        return ERR_INVALID_POINTER;
+        return ResultInvalidPointer;
    }

    static constexpr u64 MaxHandles = 0x40;
@@ -446,7 +451,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha
    if (handle_count > MaxHandles) {
        LOG_ERROR(Kernel_SVC, "Handle count specified is too large, expected {} but got {}",
                  MaxHandles, handle_count);
-        return ERR_OUT_OF_RANGE;
+        return ResultOutOfRange;
    }

    auto& kernel = system.Kernel();
@@ -459,7 +464,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha

        if (object == nullptr) {
            LOG_ERROR(Kernel_SVC, "Object is a nullptr");
-            return ERR_INVALID_HANDLE;
+            return ResultInvalidHandle;
        }

        objects[i] = object.get();
@@ -481,6 +486,7 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand
    // Get the thread from its handle.
    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
    std::shared_ptr<KThread> thread = handle_table.Get<KThread>(thread_handle);
+
    if (!thread) {
        LOG_ERROR(Kernel_SVC, "Invalid thread handle provided (handle={:08X})", thread_handle);
        return ResultInvalidHandle;
@@ -525,6 +531,7 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr address) {
    LOG_TRACE(Kernel_SVC, "called address=0x{:X}", address);

    // Validate the input address.
+
    if (Memory::IsKernelAddress(address)) {
        LOG_ERROR(Kernel_SVC,
                  "Attempting to arbitrate an unlock on a kernel address (address={:08X})",
@@ -735,7 +742,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
        if (info_sub_id != 0) {
            LOG_ERROR(Kernel_SVC, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id,
                      info_sub_id);
-            return ERR_INVALID_ENUM_VALUE;
+            return ResultInvalidEnumValue;
        }

        const auto& current_process_handle_table =
@@ -744,7 +751,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
        if (!process) {
            LOG_ERROR(Kernel_SVC, "Process is not valid! info_id={}, info_sub_id={}, handle={:08X}",
                      info_id, info_sub_id, handle);
-            return ERR_INVALID_HANDLE;
+            return ResultInvalidHandle;
        }

        switch (info_id_type) {
@@ -826,7 +833,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
        }

        LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id);
-        return ERR_INVALID_ENUM_VALUE;
+        return ResultInvalidEnumValue;
    }

    case GetInfoType::IsCurrentProcessBeingDebugged:
@@ -836,13 +843,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
    case GetInfoType::RegisterResourceLimit: {
        if (handle != 0) {
            LOG_ERROR(Kernel, "Handle is non zero! handle={:08X}", handle);
-            return ERR_INVALID_HANDLE;
+            return ResultInvalidHandle;
        }

        if (info_sub_id != 0) {
            LOG_ERROR(Kernel, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id,
                      info_sub_id);
-            return ERR_INVALID_COMBINATION;
+            return ResultInvalidCombination;
        }

        Process* const current_process = system.Kernel().CurrentProcess();
@@ -867,13 +874,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
        if (handle != 0) {
            LOG_ERROR(Kernel_SVC, "Process Handle is non zero, expected 0 result but got {:016X}",
                      handle);
-            return ERR_INVALID_HANDLE;
+            return ResultInvalidHandle;
        }

        if (info_sub_id >= Process::RANDOM_ENTROPY_SIZE) {
            LOG_ERROR(Kernel_SVC, "Entropy size is out of range, expected {} but got {}",
                      Process::RANDOM_ENTROPY_SIZE, info_sub_id);
-            return ERR_INVALID_COMBINATION;
+            return ResultInvalidCombination;
        }

        *result = system.Kernel().CurrentProcess()->GetRandomEntropy(info_sub_id);
@@ -890,7 +897,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
        if (info_sub_id != 0xFFFFFFFFFFFFFFFF && info_sub_id >= num_cpus) {
            LOG_ERROR(Kernel_SVC, "Core count is out of range, expected {} but got {}", num_cpus,
                      info_sub_id);
-            return ERR_INVALID_COMBINATION;
+            return ResultInvalidCombination;
        }

        const auto thread = system.Kernel().CurrentProcess()->GetHandleTable().Get<KThread>(
@@ -898,7 +905,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
        if (!thread) {
            LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}",
                      static_cast<Handle>(handle));
-            return ERR_INVALID_HANDLE;
+            return ResultInvalidHandle;
        }

        const auto& core_timing = system.CoreTiming();
@@ -922,7 +929,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha

    default:
        LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id);
-        return ERR_INVALID_ENUM_VALUE;
+        return ResultInvalidEnumValue;
    }
 }

@@ -945,22 +952,22 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)

    if (!Common::Is4KBAligned(addr)) {
        LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
    }

    if (!Common::Is4KBAligned(size)) {
        LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
    }

    if (size == 0) {
        LOG_ERROR(Kernel_SVC, "Size is zero");
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
    }

    if (!(addr < addr + size)) {
        LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    Process* const current_process{system.Kernel().CurrentProcess()};
@@ -968,21 +975,21 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)

    if (current_process->GetSystemResourceSize() == 0) {
        LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
-        return ERR_INVALID_STATE;
+        return ResultInvalidState;
    }

    if (!page_table.IsInsideAddressSpace(addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
                  size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    if (page_table.IsOutsideAliasRegion(addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr,
                  size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    return page_table.MapPhysicalMemory(addr, size);
@@ -999,22 +1006,22 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size

    if (!Common::Is4KBAligned(addr)) {
        LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
    }

    if (!Common::Is4KBAligned(size)) {
        LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
    }

    if (size == 0) {
        LOG_ERROR(Kernel_SVC, "Size is zero");
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
    }

    if (!(addr < addr + size)) {
        LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    Process* const current_process{system.Kernel().CurrentProcess()};
@@ -1022,21 +1029,21 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size

    if (current_process->GetSystemResourceSize() == 0) {
        LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
-        return ERR_INVALID_STATE;
+        return ResultInvalidState;
    }

    if (!page_table.IsInsideAddressSpace(addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr,
                  size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    if (page_table.IsOutsideAliasRegion(addr, size)) {
        LOG_ERROR(Kernel_SVC,
                  "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr,
                  size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    return page_table.UnmapPhysicalMemory(addr, size);
@@ -1206,23 +1213,23 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han

    if (!Common::Is4KBAligned(addr)) {
        LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, addr=0x{:016X}", addr);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
    }

    if (size == 0) {
        LOG_ERROR(Kernel_SVC, "Size is 0");
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
    }

    if (!Common::Is4KBAligned(size)) {
        LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, size=0x{:016X}", size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
    }

    if (!IsValidAddressRange(addr, size)) {
        LOG_ERROR(Kernel_SVC, "Region is not a valid address range, addr=0x{:016X}, size=0x{:016X}",
                  addr, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    const auto permission_type = static_cast<Memory::MemoryPermission>(permissions);
@@ -1230,7 +1237,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
        Memory::MemoryPermission::ReadAndWrite) {
        LOG_ERROR(Kernel_SVC, "Expected Read or ReadWrite permission but got permissions=0x{:08X}",
                  permissions);
-        return ERR_INVALID_MEMORY_PERMISSIONS;
+        return ResultInvalidMemoryPermissions;
    }

    auto* const current_process{system.Kernel().CurrentProcess()};
@@ -1241,7 +1248,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
                  "Addr does not fit within the valid region, addr=0x{:016X}, "
                  "size=0x{:016X}",
                  addr, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    if (page_table.IsInsideHeapRegion(addr, size)) {
@@ -1249,7 +1256,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
                  "Addr does not fit within the heap region, addr=0x{:016X}, "
                  "size=0x{:016X}",
                  addr, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    if (page_table.IsInsideAliasRegion(addr, size)) {
@@ -1257,14 +1264,14 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
                  "Address does not fit within the map region, addr=0x{:016X}, "
                  "size=0x{:016X}",
                  addr, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    auto shared_memory{current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle)};
    if (!shared_memory) {
        LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}",
                  shared_memory_handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
    }

    return shared_memory->Map(*current_process, addr, size, permission_type);
@@ -1285,7 +1292,7 @@ static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_add
    if (!process) {
        LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
                  process_handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
    }

    auto& memory{system.Memory()};
@@ -1332,18 +1339,18 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
    if (!Common::Is4KBAligned(src_address)) {
        LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).",
                  src_address);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
    }

    if (!Common::Is4KBAligned(dst_address)) {
        LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).",
                  dst_address);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
    }

    if (size == 0 || !Common::Is4KBAligned(size)) {
        LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
    }

    if (!IsValidAddressRange(dst_address, size)) {
@@ -1351,7 +1358,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
                  "Destination address range overflows the address space (dst_address=0x{:016X}, "
                  "size=0x{:016X}).",
                  dst_address, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    if (!IsValidAddressRange(src_address, size)) {
@@ -1359,7 +1366,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
                  "Source address range overflows the address space (src_address=0x{:016X}, "
                  "size=0x{:016X}).",
                  src_address, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
@@ -1367,7 +1374,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
    if (!process) {
        LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).",
                  process_handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
    }

    auto& page_table = process->PageTable();
@@ -1376,7 +1383,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
                  "Source address range is not within the address space (src_address=0x{:016X}, "
                  "size=0x{:016X}).",
                  src_address, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    if (!page_table.IsInsideASLRRegion(dst_address, size)) {
@@ -1384,7 +1391,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand
                  "Destination address range is not within the ASLR region (dst_address=0x{:016X}, "
                  "size=0x{:016X}).",
                  dst_address, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    return page_table.MapProcessCodeMemory(dst_address, src_address, size);
@@ -1400,18 +1407,18 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
    if (!Common::Is4KBAligned(dst_address)) {
        LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).",
                  dst_address);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
    }

    if (!Common::Is4KBAligned(src_address)) {
        LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).",
                  src_address);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
    }

    if (size == 0 || Common::Is4KBAligned(size)) {
        LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size);
-        return ERR_INVALID_SIZE;
+        return ResultInvalidSize;
    }

    if (!IsValidAddressRange(dst_address, size)) {
@@ -1419,7 +1426,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
                  "Destination address range overflows the address space (dst_address=0x{:016X}, "
                  "size=0x{:016X}).",
                  dst_address, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    if (!IsValidAddressRange(src_address, size)) {
@@ -1427,7 +1434,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
                  "Source address range overflows the address space (src_address=0x{:016X}, "
                  "size=0x{:016X}).",
                  src_address, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
@@ -1435,7 +1442,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
    if (!process) {
        LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).",
                  process_handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
    }

    auto& page_table = process->PageTable();
@@ -1444,7 +1451,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
                  "Source address range is not within the address space (src_address=0x{:016X}, "
                  "size=0x{:016X}).",
                  src_address, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    if (!page_table.IsInsideASLRRegion(dst_address, size)) {
@@ -1452,7 +1459,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
                  "Destination address range is not within the ASLR region (dst_address=0x{:016X}, "
                  "size=0x{:016X}).",
                  dst_address, size);
-        return ERR_INVALID_MEMORY_RANGE;
+        return ResultInvalidMemoryRange;
    }

    return page_table.UnmapProcessCodeMemory(dst_address, src_address, size);
@@ -1515,8 +1522,13 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
        return ResultInvalidPriority;
    }

-    ASSERT(process.GetResourceLimit()->Reserve(
-        LimitableResource::Threads, 1, system.CoreTiming().GetGlobalTimeNs().count() + 100000000));
+    KScopedResourceReservation thread_reservation(
+        kernel.CurrentProcess(), LimitableResource::Threads, 1,
+        system.CoreTiming().GetGlobalTimeNs().count() + 100000000);
+    if (!thread_reservation.Succeeded()) {
+        LOG_ERROR(Kernel_SVC, "Could not reserve a new thread");
+        return ResultResourceLimitedExceeded;
+    }

    std::shared_ptr<KThread> thread;
    {
@@ -1536,6 +1548,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
    // Set the thread name for debugging purposes.
    thread->SetName(
        fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle));
+    thread_reservation.Commit();

    return RESULT_SUCCESS;
 }
@@ -1844,7 +1857,7 @@ static ResultCode ResetSignal(Core::System& system, Handle handle) {

    LOG_ERROR(Kernel_SVC, "invalid handle (0x{:08X})", handle);

-    return Svc::ResultInvalidHandle;
+    return ResultInvalidHandle;
 }

 static ResultCode ResetSignal32(Core::System& system, Handle handle) {
@@ -1860,18 +1873,18 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd

    if (!Common::Is4KBAligned(addr)) {
        LOG_ERROR(Kernel_SVC, "Address ({:016X}) is not page aligned!", addr);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
    }

    if (!Common::Is4KBAligned(size) || size == 0) {
        LOG_ERROR(Kernel_SVC, "Size ({:016X}) is not page aligned or equal to zero!", size);
-        return ERR_INVALID_ADDRESS;
+        return ResultInvalidAddress;
    }

    if (!IsValidAddressRange(addr, size)) {
        LOG_ERROR(Kernel_SVC, "Address and size cause overflow! (address={:016X}, size={:016X})",
                  addr, size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    const auto perms{static_cast<Memory::MemoryPermission>(permissions)};
@@ -1879,10 +1892,17 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
        perms == Memory::MemoryPermission::Write) {
        LOG_ERROR(Kernel_SVC, "Invalid memory permissions for transfer memory! (perms={:08X})",
                  permissions);
-        return ERR_INVALID_MEMORY_PERMISSIONS;
+        return ResultInvalidMemoryPermissions;
    }

    auto& kernel = system.Kernel();
+    // Reserve a new transfer memory from the process resource limit.
+    KScopedResourceReservation trmem_reservation(kernel.CurrentProcess(),
+                                                 LimitableResource::TransferMemory);
+    if (!trmem_reservation.Succeeded()) {
+        LOG_ERROR(Kernel_SVC, "Could not reserve a new transfer memory");
+        return ResultResourceLimitedExceeded;
+    }
    auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, perms);

    if (const auto reserve_result{transfer_mem_handle->Reserve()}; reserve_result.IsError()) {
@@ -1894,6 +1914,7 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
    if (result.Failed()) {
        return result.Code();
    }
+    trmem_reservation.Commit();

    *handle = *result;
    return RESULT_SUCCESS;
@@ -1989,7 +2010,6 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
        LOG_ERROR(Kernel_SVC, "Unable to successfully set core mask (result={})", set_result.raw);
        return set_result;
    }
-
    return RESULT_SUCCESS;
 }

@@ -2002,8 +2022,17 @@ static ResultCode SetThreadCoreMask32(Core::System& system, Handle thread_handle
 static ResultCode SignalEvent(Core::System& system, Handle event_handle) {
    LOG_DEBUG(Kernel_SVC, "called, event_handle=0x{:08X}", event_handle);

+    auto& kernel = system.Kernel();
    // Get the current handle table.
-    const HandleTable& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+    const HandleTable& handle_table = kernel.CurrentProcess()->GetHandleTable();
+
+    // Reserve a new event from the process resource limit.
+    KScopedResourceReservation event_reservation(kernel.CurrentProcess(),
+                                                 LimitableResource::Events);
+    if (!event_reservation.Succeeded()) {
+        LOG_ERROR(Kernel, "Could not reserve a new event");
+        return ResultResourceLimitedExceeded;
+    }

    // Get the writable event.
    auto writable_event = handle_table.Get<KWritableEvent>(event_handle);
@@ -2012,6 +2041,9 @@ static ResultCode SignalEvent(Core::System& system, Handle event_handle) {
        return ResultInvalidHandle;
    }

+    // Commit the successfuly reservation.
+    event_reservation.Commit();
+
    return writable_event->Signal();
 }

@@ -2043,7 +2075,7 @@ static ResultCode ClearEvent(Core::System& system, Handle event_handle) {

    LOG_ERROR(Kernel_SVC, "Event handle does not exist, event_handle=0x{:08X}", event_handle);

-    return Svc::ResultInvalidHandle;
+    return ResultInvalidHandle;
 }

 static ResultCode ClearEvent32(Core::System& system, Handle event_handle) {
@@ -2106,13 +2138,13 @@ static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_
    if (!process) {
        LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}",
                  process_handle);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
    }

    const auto info_type = static_cast<InfoType>(type);
    if (info_type != InfoType::Status) {
        LOG_ERROR(Kernel_SVC, "Expected info_type to be Status but got {} instead", type);
-        return ERR_INVALID_ENUM_VALUE;
+        return ResultInvalidEnumValue;
    }

    *out = static_cast<u64>(process->GetStatus());
@@ -2174,7 +2206,7 @@ static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resour
    const auto type = static_cast<LimitableResource>(resource_type);
    if (!IsValidResourceType(type)) {
        LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
-        return ERR_INVALID_ENUM_VALUE;
+        return ResultInvalidEnumValue;
    }

    auto* const current_process = system.Kernel().CurrentProcess();
@@ -2185,16 +2217,16 @@ static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resour
    if (!resource_limit_object) {
        LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}",
                  resource_limit);
-        return ERR_INVALID_HANDLE;
+        return ResultInvalidHandle;
    }

    const auto set_result = resource_limit_object->SetLimitValue(type, static_cast<s64>(value));
    if (set_result.IsError()) {
-        LOG_ERROR(
-            Kernel_SVC,
-            "Attempted to lower resource limit ({}) for category '{}' below its current value ({})",
-            resource_limit_object->GetLimitValue(type), resource_type,
-            resource_limit_object->GetCurrentValue(type));
+        LOG_ERROR(Kernel_SVC,
+                  "Attempted to lower resource limit ({}) for category '{}' below its current "
+                  "value ({})",
+                  resource_limit_object->GetLimitValue(type), resource_type,
+                  resource_limit_object->GetCurrentValue(type));
        return set_result;
    }

@@ -2211,7 +2243,7 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
        LOG_ERROR(Kernel_SVC,
                  "Supplied size outside [0, 0x0FFFFFFF] range. out_process_ids_size={}",
                  out_process_ids_size);
-        return ERR_OUT_OF_RANGE;
+        return ResultOutOfRange;
    }

    const auto& kernel = system.Kernel();
@@ -2221,7 +2253,7 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes,
                                        out_process_ids, total_copy_size)) {
        LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
                  out_process_ids, out_process_ids + total_copy_size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    auto& memory = system.Memory();
@@ -2250,7 +2282,7 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd
    if ((out_thread_ids_size & 0xF0000000) != 0) {
        LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. size={}",
                  out_thread_ids_size);
-        return ERR_OUT_OF_RANGE;
+        return ResultOutOfRange;
    }

    const auto* const current_process = system.Kernel().CurrentProcess();
@@ -2260,7 +2292,7 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd
        !current_process->PageTable().IsInsideAddressSpace(out_thread_ids, total_copy_size)) {
        LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}",
                  out_thread_ids, out_thread_ids + total_copy_size);
-        return ERR_INVALID_ADDRESS_STATE;
+        return ResultInvalidCurrentMemory;
    }

    auto& memory = system.Memory();
--- a/src/core/hle/kernel/svc_results.h
+++ b/src/core/hle/kernel/svc_results.h
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2018 yuzu emulator team
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

@@ -6,21 +6,36 @@

 #include "core/hle/result.h"

-namespace Kernel::Svc {
+namespace Kernel {

+// Confirmed Switch kernel error codes
+
+constexpr ResultCode ResultMaxConnectionsReached{ErrorModule::Kernel, 7};
+constexpr ResultCode ResultInvalidCapabilityDescriptor{ErrorModule::Kernel, 14};
 constexpr ResultCode ResultNoSynchronizationObject{ErrorModule::Kernel, 57};
 constexpr ResultCode ResultTerminationRequested{ErrorModule::Kernel, 59};
+constexpr ResultCode ResultInvalidSize{ErrorModule::Kernel, 101};
 constexpr ResultCode ResultInvalidAddress{ErrorModule::Kernel, 102};
 constexpr ResultCode ResultOutOfResource{ErrorModule::Kernel, 103};
+constexpr ResultCode ResultOutOfMemory{ErrorModule::Kernel, 104};
+constexpr ResultCode ResultHandleTableFull{ErrorModule::Kernel, 105};
 constexpr ResultCode ResultInvalidCurrentMemory{ErrorModule::Kernel, 106};
+constexpr ResultCode ResultInvalidMemoryPermissions{ErrorModule::Kernel, 108};
+constexpr ResultCode ResultInvalidMemoryRange{ErrorModule::Kernel, 110};
 constexpr ResultCode ResultInvalidPriority{ErrorModule::Kernel, 112};
 constexpr ResultCode ResultInvalidCoreId{ErrorModule::Kernel, 113};
 constexpr ResultCode ResultInvalidHandle{ErrorModule::Kernel, 114};
+constexpr ResultCode ResultInvalidPointer{ErrorModule::Kernel, 115};
 constexpr ResultCode ResultInvalidCombination{ErrorModule::Kernel, 116};
 constexpr ResultCode ResultTimedOut{ErrorModule::Kernel, 117};
 constexpr ResultCode ResultCancelled{ErrorModule::Kernel, 118};
+constexpr ResultCode ResultOutOfRange{ErrorModule::Kernel, 119};
 constexpr ResultCode ResultInvalidEnumValue{ErrorModule::Kernel, 120};
+constexpr ResultCode ResultNotFound{ErrorModule::Kernel, 121};
 constexpr ResultCode ResultBusy{ErrorModule::Kernel, 122};
+constexpr ResultCode ResultSessionClosedByRemote{ErrorModule::Kernel, 123};
 constexpr ResultCode ResultInvalidState{ErrorModule::Kernel, 125};
+constexpr ResultCode ResultReservedValue{ErrorModule::Kernel, 126};
+constexpr ResultCode ResultResourceLimitedExceeded{ErrorModule::Kernel, 132};

-} // namespace Kernel::Svc
+} // namespace Kernel
--- a/src/core/hle/kernel/transfer_memory.cpp
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include "core/hle/kernel/k_resource_limit.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/memory/page_table.h"
 #include "core/hle/kernel/process.h"
@@ -17,6 +18,7 @@ TransferMemory::TransferMemory(KernelCore& kernel, Core::Memory::Memory& memory)
 TransferMemory::~TransferMemory() {
    // Release memory region when transfer memory is destroyed
    Reset();
+    owner_process->GetResourceLimit()->Release(LimitableResource::TransferMemory, 1);
 }

 std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel,
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -1047,20 +1047,21 @@ void IStorageAccessor::Write(Kernel::HLERequestContext& ctx) {

    const u64 offset{rp.Pop<u64>()};
    const std::vector<u8> data{ctx.ReadBuffer()};
+    const std::size_t size{std::min(data.size(), backing.GetSize() - offset)};

-    LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, data.size());
+    LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size);

-    if (data.size() > backing.GetSize() - offset) {
+    if (offset > backing.GetSize()) {
        LOG_ERROR(Service_AM,
                  "offset is out of bounds, backing_buffer_sz={}, data_size={}, offset={}",
-                  backing.GetSize(), data.size(), offset);
+                  backing.GetSize(), size, offset);

        IPC::ResponseBuilder rb{ctx, 2};
        rb.Push(ERR_SIZE_OUT_OF_BOUNDS);
        return;
    }

-    std::memcpy(backing.GetData().data() + offset, data.data(), data.size());
+    std::memcpy(backing.GetData().data() + offset, data.data(), size);

    IPC::ResponseBuilder rb{ctx, 2};
    rb.Push(RESULT_SUCCESS);
@@ -1070,11 +1071,11 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp{ctx};

    const u64 offset{rp.Pop<u64>()};
-    const std::size_t size{ctx.GetWriteBufferSize()};
+    const std::size_t size{std::min(ctx.GetWriteBufferSize(), backing.GetSize() - offset)};

    LOG_DEBUG(Service_AM, "called, offset={}, size={}", offset, size);

-    if (size > backing.GetSize() - offset) {
+    if (offset > backing.GetSize()) {
        LOG_ERROR(Service_AM, "offset is out of bounds, backing_buffer_sz={}, size={}, offset={}",
                  backing.GetSize(), size, offset);

--- a/src/core/hle/service/am/applets/controller.cpp
+++ b/src/core/hle/service/am/applets/controller.cpp
@@ -211,7 +211,8 @@ void Controller::Execute() {
    case ControllerSupportMode::ShowControllerFirmwareUpdate:
        UNIMPLEMENTED_MSG("ControllerSupportMode={} is not implemented",
                          controller_private_arg.mode);
-        [[fallthrough]];
+        ConfigurationComplete();
+        break;
    default: {
        ConfigurationComplete();
        break;
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -21,6 +21,7 @@

 namespace Service::HID {
 constexpr s32 HID_JOYSTICK_MAX = 0x7fff;
+constexpr s32 HID_TRIGGER_MAX = 0x7fff;
 [[maybe_unused]] constexpr s32 HID_JOYSTICK_MIN = -0x7fff;
 constexpr std::size_t NPAD_OFFSET = 0x9A00;
 constexpr u32 BATTERY_FULL = 2;
@@ -48,6 +49,8 @@ Controller_NPad::NPadControllerType Controller_NPad::MapSettingsTypeToNPad(
        return NPadControllerType::JoyRight;
    case Settings::ControllerType::Handheld:
        return NPadControllerType::Handheld;
+    case Settings::ControllerType::GameCube:
+        return NPadControllerType::GameCube;
    default:
        UNREACHABLE();
        return NPadControllerType::ProController;
@@ -67,6 +70,8 @@ Settings::ControllerType Controller_NPad::MapNPadToSettingsType(
        return Settings::ControllerType::RightJoycon;
    case NPadControllerType::Handheld:
        return Settings::ControllerType::Handheld;
+    case NPadControllerType::GameCube:
+        return Settings::ControllerType::GameCube;
    default:
        UNREACHABLE();
        return Settings::ControllerType::ProController;
@@ -209,6 +214,13 @@ void Controller_NPad::InitNewlyAddedController(std::size_t controller_idx) {
        controller.assignment_mode = NpadAssignments::Single;
        controller.footer_type = AppletFooterUiType::JoyRightHorizontal;
        break;
+    case NPadControllerType::GameCube:
+        controller.style_set.gamecube.Assign(1);
+        // The GC Controller behaves like a wired Pro Controller
+        controller.device_type.fullkey.Assign(1);
+        controller.system_properties.is_vertical.Assign(1);
+        controller.system_properties.use_plus.Assign(1);
+        break;
    case NPadControllerType::Pokeball:
        controller.style_set.palma.Assign(1);
        controller.device_type.palma.Assign(1);
@@ -259,6 +271,7 @@ void Controller_NPad::OnInit() {
        style.joycon_right.Assign(1);
        style.joycon_dual.Assign(1);
        style.fullkey.Assign(1);
+        style.gamecube.Assign(1);
        style.palma.Assign(1);
    }

@@ -339,6 +352,7 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
    auto& pad_state = npad_pad_states[controller_idx].pad_states;
    auto& lstick_entry = npad_pad_states[controller_idx].l_stick;
    auto& rstick_entry = npad_pad_states[controller_idx].r_stick;
+    auto& trigger_entry = npad_trigger_states[controller_idx];
    const auto& button_state = buttons[controller_idx];
    const auto& analog_state = sticks[controller_idx];
    const auto [stick_l_x_f, stick_l_y_f] =
@@ -404,6 +418,17 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
        pad_state.left_sl.Assign(button_state[SL - BUTTON_HID_BEGIN]->GetStatus());
        pad_state.left_sr.Assign(button_state[SR - BUTTON_HID_BEGIN]->GetStatus());
    }
+
+    if (controller_type == NPadControllerType::GameCube) {
+        trigger_entry.l_analog = static_cast<s32>(
+            button_state[ZL - BUTTON_HID_BEGIN]->GetStatus() ? HID_TRIGGER_MAX : 0);
+        trigger_entry.r_analog = static_cast<s32>(
+            button_state[ZR - BUTTON_HID_BEGIN]->GetStatus() ? HID_TRIGGER_MAX : 0);
+        pad_state.zl.Assign(false);
+        pad_state.zr.Assign(button_state[R - BUTTON_HID_BEGIN]->GetStatus());
+        pad_state.l.Assign(button_state[ZL - BUTTON_HID_BEGIN]->GetStatus());
+        pad_state.r.Assign(button_state[ZR - BUTTON_HID_BEGIN]->GetStatus());
+    }
 }

 void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
@@ -418,6 +443,11 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
            &npad.joy_left_states,  &npad.joy_right_states, &npad.palma_states,
            &npad.system_ext_states};

+        // There is the posibility to have more controllers with analog triggers
+        const std::array<TriggerGeneric*, 1> controller_triggers{
+            &npad.gc_trigger_states,
+        };
+
        for (auto* main_controller : controller_npads) {
            main_controller->common.entry_count = 16;
            main_controller->common.total_entry_count = 17;
@@ -435,6 +465,21 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
            cur_entry.timestamp2 = cur_entry.timestamp;
        }

+        for (auto* analog_trigger : controller_triggers) {
+            analog_trigger->entry_count = 16;
+            analog_trigger->total_entry_count = 17;
+
+            const auto& last_entry = analog_trigger->trigger[analog_trigger->last_entry_index];
+
+            analog_trigger->timestamp = core_timing.GetCPUTicks();
+            analog_trigger->last_entry_index = (analog_trigger->last_entry_index + 1) % 17;
+
+            auto& cur_entry = analog_trigger->trigger[analog_trigger->last_entry_index];
+
+            cur_entry.timestamp = last_entry.timestamp + 1;
+            cur_entry.timestamp2 = cur_entry.timestamp;
+        }
+
        const auto& controller_type = connected_controllers[i].type;

        if (controller_type == NPadControllerType::None || !connected_controllers[i].is_connected) {
@@ -444,6 +489,7 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*

        RequestPadStateUpdate(npad_index);
        auto& pad_state = npad_pad_states[npad_index];
+        auto& trigger_state = npad_trigger_states[npad_index];

        auto& main_controller =
            npad.fullkey_states.npad[npad.fullkey_states.common.last_entry_index];
@@ -456,6 +502,8 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
        auto& pokeball_entry = npad.palma_states.npad[npad.palma_states.common.last_entry_index];
        auto& libnx_entry =
            npad.system_ext_states.npad[npad.system_ext_states.common.last_entry_index];
+        auto& trigger_entry =
+            npad.gc_trigger_states.trigger[npad.gc_trigger_states.last_entry_index];

        libnx_entry.connection_status.raw = 0;
        libnx_entry.connection_status.is_connected.Assign(1);
@@ -524,6 +572,18 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*

            libnx_entry.connection_status.is_right_connected.Assign(1);
            break;
+        case NPadControllerType::GameCube:
+            main_controller.connection_status.raw = 0;
+            main_controller.connection_status.is_connected.Assign(1);
+            main_controller.connection_status.is_wired.Assign(1);
+            main_controller.pad.pad_states.raw = pad_state.pad_states.raw;
+            main_controller.pad.l_stick = pad_state.l_stick;
+            main_controller.pad.r_stick = pad_state.r_stick;
+            trigger_entry.l_analog = trigger_state.l_analog;
+            trigger_entry.r_analog = trigger_state.r_analog;
+
+            libnx_entry.connection_status.is_wired.Assign(1);
+            break;
        case NPadControllerType::Pokeball:
            pokeball_entry.connection_status.raw = 0;
            pokeball_entry.connection_status.is_connected.Assign(1);
@@ -674,6 +734,7 @@ void Controller_NPad::OnMotionUpdate(const Core::Timing::CoreTiming& core_timing
                right_sixaxis_entry.orientation = motion_devices[1].orientation;
            }
            break;
+        case NPadControllerType::GameCube:
        case NPadControllerType::Pokeball:
            break;
        }
@@ -1135,6 +1196,8 @@ bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const
            return style.joycon_left;
        case NPadControllerType::JoyRight:
            return style.joycon_right;
+        case NPadControllerType::GameCube:
+            return style.gamecube;
        case NPadControllerType::Pokeball:
            return style.palma;
        default:
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -51,6 +51,7 @@ public:
        JoyDual,
        JoyLeft,
        JoyRight,
+        GameCube,
        Pokeball,
    };

@@ -60,6 +61,7 @@ public:
        JoyconDual = 5,
        JoyconLeft = 6,
        JoyconRight = 7,
+        GameCube = 8,
        Pokeball = 9,
        MaxNpadType = 10,
    };
@@ -389,6 +391,25 @@ private:
    };
    static_assert(sizeof(SixAxisGeneric) == 0x708, "SixAxisGeneric is an invalid size");

+    struct TriggerState {
+        s64_le timestamp{};
+        s64_le timestamp2{};
+        s32_le l_analog{};
+        s32_le r_analog{};
+    };
+    static_assert(sizeof(TriggerState) == 0x18, "TriggerState is an invalid size");
+
+    struct TriggerGeneric {
+        INSERT_PADDING_BYTES(0x4);
+        s64_le timestamp;
+        INSERT_PADDING_BYTES(0x4);
+        s64_le total_entry_count;
+        s64_le last_entry_index;
+        s64_le entry_count;
+        std::array<TriggerState, 17> trigger{};
+    };
+    static_assert(sizeof(TriggerGeneric) == 0x1C8, "TriggerGeneric is an invalid size");
+
    struct NPadSystemProperties {
        union {
            s64_le raw{};
@@ -509,7 +530,9 @@ private:
        AppletFooterUiType footer_type;
        // nfc_states needs to be checked switchbrew does not match with HW
        NfcXcdHandle nfc_states;
-        INSERT_PADDING_BYTES(0xdef);
+        INSERT_PADDING_BYTES(0x8); // Mutex
+        TriggerGeneric gc_trigger_states;
+        INSERT_PADDING_BYTES(0xc1f);
    };
    static_assert(sizeof(NPadEntry) == 0x5000, "NPadEntry is an invalid size");

@@ -560,6 +583,7 @@ private:
    f32 sixaxis_fusion_parameter2{};
    bool sixaxis_at_rest{true};
    std::array<ControllerPad, 10> npad_pad_states{};
+    std::array<TriggerState, 10> npad_trigger_states{};
    bool is_in_lr_assignment_mode{false};
    Core::System& system;
 };
--- a/src/core/hle/service/ldn/errors.h
+++ b/src/core/hle/service/ldn/errors.h
@@ -0,0 +1,13 @@
+// Copyright 2021 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/result.h"
+
+namespace Service::LDN {
+
+constexpr ResultCode ERROR_DISABLED{ErrorModule::LDN, 22};
+
+} // namespace Service::LDN
--- a/src/core/hle/service/ldn/ldn.cpp
+++ b/src/core/hle/service/ldn/ldn.cpp
@@ -6,6 +6,7 @@

 #include "core/hle/ipc_helpers.h"
 #include "core/hle/result.h"
+#include "core/hle/service/ldn/errors.h"
 #include "core/hle/service/ldn/ldn.h"
 #include "core/hle/service/sm/sm.h"

@@ -103,7 +104,7 @@ public:
        : ServiceFramework{system_, "IUserLocalCommunicationService"} {
        // clang-format off
        static const FunctionInfo functions[] = {
-            {0, nullptr, "GetState"},
+            {0, &IUserLocalCommunicationService::GetState, "GetState"},
            {1, nullptr, "GetNetworkInfo"},
            {2, nullptr, "GetIpv4Address"},
            {3, nullptr, "GetDisconnectReason"},
@@ -138,13 +139,38 @@ public:
        RegisterHandlers(functions);
    }

-    void Initialize2(Kernel::HLERequestContext& ctx) {
+    void GetState(Kernel::HLERequestContext& ctx) {
        LOG_WARNING(Service_LDN, "(STUBBED) called");
-        // Result success seem make this services start network and continue.
-        // If we just pass result error then it will stop and maybe try again and again.
-        IPC::ResponseBuilder rb{ctx, 2};
-        rb.Push(RESULT_UNKNOWN);
+
+        IPC::ResponseBuilder rb{ctx, 3};
+
+        // Indicate a network error, as we do not actually emulate LDN
+        rb.Push(static_cast<u32>(State::Error));
+
+        rb.Push(RESULT_SUCCESS);
    }
+
+    void Initialize2(Kernel::HLERequestContext& ctx) {
+        LOG_DEBUG(Service_LDN, "called");
+
+        is_initialized = true;
+
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(RESULT_SUCCESS);
+    }
+
+private:
+    enum class State {
+        None,
+        Initialized,
+        AccessPointOpened,
+        AccessPointCreated,
+        StationOpened,
+        StationConnected,
+        Error,
+    };
+
+    bool is_initialized{};
 };

 class LDNS final : public ServiceFramework<LDNS> {
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -11,10 +11,10 @@
 #include "common/scope_exit.h"
 #include "core/core.h"
 #include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/memory/page_table.h"
 #include "core/hle/kernel/memory/system_control.h"
 #include "core/hle/kernel/process.h"
+#include "core/hle/kernel/svc_results.h"
 #include "core/hle/service/ldr/ldr.h"
 #include "core/hle/service/service.h"
 #include "core/loader/nro.h"
@@ -330,7 +330,7 @@ public:
            const VAddr addr{GetRandomMapRegion(page_table, size)};
            const ResultCode result{page_table.MapProcessCodeMemory(addr, baseAddress, size)};

-            if (result == Kernel::ERR_INVALID_ADDRESS_STATE) {
+            if (result == Kernel::ResultInvalidCurrentMemory) {
                continue;
            }

@@ -361,7 +361,7 @@ public:
                const ResultCode result{
                    page_table.MapProcessCodeMemory(addr + nro_size, bss_addr, bss_size)};

-                if (result == Kernel::ERR_INVALID_ADDRESS_STATE) {
+                if (result == Kernel::ResultInvalidCurrentMemory) {
                    continue;
                }

--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -215,7 +215,7 @@ private:
        const auto& amiibo = nfp_interface.GetAmiiboBuffer();
        const TagInfo tag_info{
            .uuid = amiibo.uuid,
-            .uuid_length = static_cast<u8>(tag_info.uuid.size()),
+            .uuid_length = static_cast<u8>(amiibo.uuid.size()),
            .padding_1 = {},
            .protocol = 1, // TODO(ogniK): Figure out actual values
            .tag_type = 2,
--- a/src/core/hle/service/sockets/bsd.cpp
+++ b/src/core/hle/service/sockets/bsd.cpp
@@ -453,7 +453,8 @@ std::pair<s32, Errno> BSD::SocketImpl(Domain domain, Type type, Protocol protoco
        return {-1, Errno::MFILE};
    }

-    FileDescriptor& descriptor = file_descriptors[fd].emplace();
+    file_descriptors[fd] = FileDescriptor{};
+    FileDescriptor& descriptor = *file_descriptors[fd];
    // ENONMEM might be thrown here

    LOG_INFO(Service, "New socket fd={}", fd);
@@ -548,7 +549,8 @@ std::pair<s32, Errno> BSD::AcceptImpl(s32 fd, std::vector<u8>& write_buffer) {
        return {-1, Translate(bsd_errno)};
    }

-    FileDescriptor& new_descriptor = file_descriptors[new_fd].emplace();
+    file_descriptors[new_fd] = FileDescriptor{};
+    FileDescriptor& new_descriptor = *file_descriptors[new_fd];
    new_descriptor.socket = std::move(result.socket);
    new_descriptor.is_connection_based = descriptor.is_connection_based;

--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -181,12 +181,13 @@ struct Values {
    std::string motion_device;
    std::string udp_input_servers;

-    bool emulate_analog_keyboard;
-
+    bool mouse_panning;
+    float mouse_panning_sensitivity;
    bool mouse_enabled;
    std::string mouse_device;
    MouseButtonsRaw mouse_buttons;

+    bool emulate_analog_keyboard;
    bool keyboard_enabled;
    KeyboardKeysRaw keyboard_keys;
    KeyboardModsRaw keyboard_mods;
--- a/src/input_common/mouse/mouse_input.cpp
+++ b/src/input_common/mouse/mouse_input.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2+
 // Refer to the license.txt file included.

+#include "core/settings.h"
 #include "input_common/mouse/mouse_input.h"

 namespace MouseInput {
@@ -32,10 +33,18 @@ void Mouse::UpdateThread() {
            info.motion.UpdateOrientation(update_time * 1000);
            info.tilt_speed = 0;
            info.data.motion = info.motion.GetMotion();
+            if (Settings::values.mouse_panning) {
+                info.last_mouse_change *= 0.96f;
+                info.data.axis = {static_cast<int>(16 * info.last_mouse_change.x),
+                                  static_cast<int>(16 * -info.last_mouse_change.y)};
+            }
        }
        if (configuring) {
            UpdateYuzuSettings();
        }
+        if (mouse_panning_timout++ > 20) {
+            StopPanning();
+        }
        std::this_thread::sleep_for(std::chrono::milliseconds(update_time));
    }
 }
@@ -65,8 +74,45 @@ void Mouse::PressButton(int x, int y, int button_) {
    mouse_info[button_index].data.pressed = true;
 }

-void Mouse::MouseMove(int x, int y) {
+void Mouse::StopPanning() {
    for (MouseInfo& info : mouse_info) {
+        if (Settings::values.mouse_panning) {
+            info.data.axis = {};
+            info.tilt_speed = 0;
+            info.last_mouse_change = {};
+        }
+    }
+}
+
+void Mouse::MouseMove(int x, int y, int center_x, int center_y) {
+    for (MouseInfo& info : mouse_info) {
+        if (Settings::values.mouse_panning) {
+            auto mouse_change =
+                (Common::MakeVec(x, y) - Common::MakeVec(center_x, center_y)).Cast<float>();
+            mouse_panning_timout = 0;
+
+            if (mouse_change.y == 0 && mouse_change.x == 0) {
+                continue;
+            }
+            const auto mouse_change_length = mouse_change.Length();
+            if (mouse_change_length < 3.0f) {
+                mouse_change /= mouse_change_length / 3.0f;
+            }
+
+            info.last_mouse_change = (info.last_mouse_change * 0.91f) + (mouse_change * 0.09f);
+
+            const auto last_mouse_change_length = info.last_mouse_change.Length();
+            if (last_mouse_change_length > 8.0f) {
+                info.last_mouse_change /= last_mouse_change_length / 8.0f;
+            } else if (last_mouse_change_length < 1.0f) {
+                info.last_mouse_change = mouse_change / mouse_change.Length();
+            }
+
+            info.tilt_direction = info.last_mouse_change;
+            info.tilt_speed = info.tilt_direction.Normalize() * info.sensitivity;
+            continue;
+        }
+
        if (info.data.pressed) {
            const auto mouse_move = Common::MakeVec(x, y) - info.mouse_origin;
            const auto mouse_change = Common::MakeVec(x, y) - info.last_mouse_position;
--- a/src/input_common/mouse/mouse_input.h
+++ b/src/input_common/mouse/mouse_input.h
@@ -57,8 +57,10 @@ public:
     * Signals that mouse has moved.
     * @param x the x-coordinate of the cursor
     * @param y the y-coordinate of the cursor
+     * @param center_x the x-coordinate of the middle of the screen
+     * @param center_y the y-coordinate of the middle of the screen
     */
-    void MouseMove(int x, int y);
+    void MouseMove(int x, int y, int center_x, int center_y);

    /**
     * Signals that a motion sensor tilt has ended.
@@ -74,11 +76,13 @@ public:
 private:
    void UpdateThread();
    void UpdateYuzuSettings();
+    void StopPanning();

    struct MouseInfo {
        InputCommon::MotionInput motion{0.0f, 0.0f, 0.0f};
        Common::Vec2<int> mouse_origin;
        Common::Vec2<int> last_mouse_position;
+        Common::Vec2<float> last_mouse_change;
        bool is_tilting = false;
        float sensitivity{0.120f};

@@ -94,5 +98,6 @@ private:
    Common::SPSCQueue<MouseStatus> mouse_queue;
    bool configuring{false};
    bool update_thread_running{true};
+    int mouse_panning_timout{};
 };
 } // namespace MouseInput
--- a/src/input_common/mouse/mouse_poller.cpp
+++ b/src/input_common/mouse/mouse_poller.cpp
@@ -6,6 +6,7 @@
 #include <utility>

 #include "common/threadsafe_queue.h"
+#include "core/settings.h"
 #include "input_common/mouse/mouse_input.h"
 #include "input_common/mouse/mouse_poller.h"

@@ -71,7 +72,7 @@ public:
        std::lock_guard lock{mutex};
        const auto axis_value =
            static_cast<float>(mouse_input->GetMouseState(button).axis.at(axis));
-        return axis_value / (100.0f * range);
+        return axis_value * Settings::values.mouse_panning_sensitivity / (100.0f * range);
    }

    std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const {
--- a/src/input_common/sdl/sdl_impl.cpp
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -717,6 +717,13 @@ SDLState::SDLState() {
    if (SDL_SetHint(SDL_HINT_JOYSTICK_ALLOW_BACKGROUND_EVENTS, "1") == SDL_FALSE) {
        LOG_ERROR(Input, "Failed to set hint for background events with: {}", SDL_GetError());
    }
+// these hints are only defined on sdl2.0.9 or higher
+#if SDL_VERSION_ATLEAST(2, 0, 9)
+#if !SDL_VERSION_ATLEAST(2, 0, 12)
+    // There are also hints to toggle the individual drivers if needed.
+    SDL_SetHint(SDL_HINT_JOYSTICK_HIDAPI, "0");
+#endif
+#endif

    SDL_AddEventWatch(&SDLEventWatcher, this);

--- a/src/input_common/settings.h
+++ b/src/input_common/settings.h
@@ -340,6 +340,7 @@ enum class ControllerType {
    LeftJoycon,
    RightJoycon,
    Handheld,
+    GameCube,
 };

 struct PlayerInput {
--- a/src/tests/video_core/buffer_base.cpp
+++ b/src/tests/video_core/buffer_base.cpp
@@ -471,3 +471,79 @@ TEST_CASE("BufferBase: Unaligned page region query") {
    REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000));
    REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1));
 }
+
+TEST_CASE("BufferBase: Cached write") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    buffer.CachedCpuWrite(c + PAGE, PAGE);
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    buffer.FlushCachedWrites();
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    buffer.MarkRegionAsCpuModified(c, WORD);
+    REQUIRE(rasterizer.Count() == 0);
+}
+
+TEST_CASE("BufferBase: Multiple cached write") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    buffer.CachedCpuWrite(c + PAGE, PAGE);
+    buffer.CachedCpuWrite(c + PAGE * 3, PAGE);
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
+    buffer.FlushCachedWrites();
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
+    buffer.MarkRegionAsCpuModified(c, WORD);
+    REQUIRE(rasterizer.Count() == 0);
+}
+
+TEST_CASE("BufferBase: Cached write unmarked") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    buffer.CachedCpuWrite(c + PAGE, PAGE);
+    buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE);
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    buffer.FlushCachedWrites();
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    buffer.MarkRegionAsCpuModified(c, WORD);
+    REQUIRE(rasterizer.Count() == 0);
+}
+
+TEST_CASE("BufferBase: Cached write iterated") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    buffer.CachedCpuWrite(c + PAGE, PAGE);
+    int num = 0;
+    buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
+    REQUIRE(num == 0);
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    buffer.FlushCachedWrites();
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    buffer.MarkRegionAsCpuModified(c, WORD);
+    REQUIRE(rasterizer.Count() == 0);
+}
+
+TEST_CASE("BufferBase: Cached write downloads") {
+    RasterizerInterface rasterizer;
+    BufferBase buffer(rasterizer, c, WORD);
+    buffer.UnmarkRegionAsCpuModified(c, WORD);
+    REQUIRE(rasterizer.Count() == 64);
+    buffer.CachedCpuWrite(c + PAGE, PAGE);
+    REQUIRE(rasterizer.Count() == 63);
+    buffer.MarkRegionAsGpuModified(c + PAGE, PAGE);
+    int num = 0;
+    buffer.ForEachDownloadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
+    buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
+    REQUIRE(num == 0);
+    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
+    buffer.FlushCachedWrites();
+    REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
+    REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
+    buffer.MarkRegionAsCpuModified(c, WORD);
+    REQUIRE(rasterizer.Count() == 0);
+}
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -2,10 +2,8 @@ add_subdirectory(host_shaders)

 add_library(video_core STATIC
    buffer_cache/buffer_base.h
-    buffer_cache/buffer_block.h
+    buffer_cache/buffer_cache.cpp
    buffer_cache/buffer_cache.h
-    buffer_cache/map_interval.cpp
-    buffer_cache/map_interval.h
    cdma_pusher.cpp
    cdma_pusher.h
    command_classes/codecs/codec.cpp
@@ -152,8 +150,6 @@ add_library(video_core STATIC
    renderer_vulkan/vk_staging_buffer_pool.h
    renderer_vulkan/vk_state_tracker.cpp
    renderer_vulkan/vk_state_tracker.h
-    renderer_vulkan/vk_stream_buffer.cpp
-    renderer_vulkan/vk_stream_buffer.h
    renderer_vulkan/vk_swapchain.cpp
    renderer_vulkan/vk_swapchain.h
    renderer_vulkan/vk_texture_cache.cpp
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -19,6 +19,7 @@ namespace VideoCommon {

 enum class BufferFlagBits {
    Picked = 1 << 0,
+    CachedWrites = 1 << 1,
 };
 DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits)

@@ -40,7 +41,7 @@ class BufferBase {
    static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;

    /// Vector tracking modified pages tightly packed with small vector optimization
-    union WrittenWords {
+    union WordsArray {
        /// Returns the pointer to the words state
        [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
            return is_short ? &stack : heap;
@@ -55,49 +56,59 @@ class BufferBase {
        u64* heap;     ///< Not-small buffers pointer to the storage
    };

-    struct GpuCpuWords {
-        explicit GpuCpuWords() = default;
-        explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} {
+    struct Words {
+        explicit Words() = default;
+        explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
            if (IsShort()) {
                cpu.stack = ~u64{0};
                gpu.stack = 0;
+                cached_cpu.stack = 0;
+                untracked.stack = ~u64{0};
            } else {
                // Share allocation between CPU and GPU pages and set their default values
                const size_t num_words = NumWords();
-                u64* const alloc = new u64[num_words * 2];
+                u64* const alloc = new u64[num_words * 4];
                cpu.heap = alloc;
                gpu.heap = alloc + num_words;
+                cached_cpu.heap = alloc + num_words * 2;
+                untracked.heap = alloc + num_words * 3;
                std::fill_n(cpu.heap, num_words, ~u64{0});
                std::fill_n(gpu.heap, num_words, 0);
+                std::fill_n(cached_cpu.heap, num_words, 0);
+                std::fill_n(untracked.heap, num_words, ~u64{0});
            }
            // Clean up tailing bits
-            const u64 last_local_page =
-                Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE);
+            const u64 last_word_size = size_bytes % BYTES_PER_WORD;
+            const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
            const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
-            u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1];
-            last_word = (last_word << shift) >> shift;
+            const u64 last_word = (~u64{0} << shift) >> shift;
+            cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
+            untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
        }

-        ~GpuCpuWords() {
+        ~Words() {
            Release();
        }

-        GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept {
+        Words& operator=(Words&& rhs) noexcept {
            Release();
            size_bytes = rhs.size_bytes;
            cpu = rhs.cpu;
            gpu = rhs.gpu;
+            cached_cpu = rhs.cached_cpu;
+            untracked = rhs.untracked;
            rhs.cpu.heap = nullptr;
            return *this;
        }

-        GpuCpuWords(GpuCpuWords&& rhs) noexcept
-            : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} {
+        Words(Words&& rhs) noexcept
+            : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu},
+              cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
            rhs.cpu.heap = nullptr;
        }

-        GpuCpuWords& operator=(const GpuCpuWords&) = delete;
-        GpuCpuWords(const GpuCpuWords&) = delete;
+        Words& operator=(const Words&) = delete;
+        Words(const Words&) = delete;

        /// Returns true when the buffer fits in the small vector optimization
        [[nodiscard]] bool IsShort() const noexcept {
@@ -118,8 +129,17 @@ class BufferBase {
        }

        u64 size_bytes = 0;
-        WrittenWords cpu;
-        WrittenWords gpu;
+        WordsArray cpu;
+        WordsArray gpu;
+        WordsArray cached_cpu;
+        WordsArray untracked;
+    };
+
+    enum class Type {
+        CPU,
+        GPU,
+        CachedCPU,
+        Untracked,
    };

 public:
@@ -132,68 +152,93 @@ public:
    BufferBase& operator=(const BufferBase&) = delete;
    BufferBase(const BufferBase&) = delete;

+    BufferBase& operator=(BufferBase&&) = default;
+    BufferBase(BufferBase&&) = default;
+
    /// Returns the inclusive CPU modified range in a begin end pair
    [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
                                                        u64 query_size) const noexcept {
        const u64 offset = query_cpu_addr - cpu_addr;
-        return ModifiedRegion<false>(offset, query_size);
+        return ModifiedRegion<Type::CPU>(offset, query_size);
    }

    /// Returns the inclusive GPU modified range in a begin end pair
    [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
                                                        u64 query_size) const noexcept {
        const u64 offset = query_cpu_addr - cpu_addr;
-        return ModifiedRegion<true>(offset, query_size);
+        return ModifiedRegion<Type::GPU>(offset, query_size);
    }

    /// Returns true if a region has been modified from the CPU
    [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
        const u64 offset = query_cpu_addr - cpu_addr;
-        return IsRegionModified<false>(offset, query_size);
+        return IsRegionModified<Type::CPU>(offset, query_size);
    }

    /// Returns true if a region has been modified from the GPU
    [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
        const u64 offset = query_cpu_addr - cpu_addr;
-        return IsRegionModified<true>(offset, query_size);
+        return IsRegionModified<Type::GPU>(offset, query_size);
    }

    /// Mark region as CPU modified, notifying the rasterizer about this change
    void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
-        ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size);
+        ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size);
    }

    /// Unmark region as CPU modified, notifying the rasterizer about this change
    void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
-        ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size);
+        ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size);
    }

    /// Mark region as modified from the host GPU
    void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
-        ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size);
+        ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size);
    }

    /// Unmark region as modified from the host GPU
    void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
-        ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size);
+        ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size);
+    }
+
+    /// Mark region as modified from the CPU
+    /// but don't mark it as modified until FlusHCachedWrites is called.
+    void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) {
+        flags |= BufferFlagBits::CachedWrites;
+        ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size);
+    }
+
+    /// Flushes cached CPU writes, and notify the rasterizer about the deltas
+    void FlushCachedWrites() noexcept {
+        flags &= ~BufferFlagBits::CachedWrites;
+        const u64 num_words = NumWords();
+        const u64* const cached_words = Array<Type::CachedCPU>();
+        u64* const untracked_words = Array<Type::Untracked>();
+        u64* const cpu_words = Array<Type::CPU>();
+        for (u64 word_index = 0; word_index < num_words; ++word_index) {
+            const u64 cached_bits = cached_words[word_index];
+            NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
+            untracked_words[word_index] |= cached_bits;
+            cpu_words[word_index] |= cached_bits;
+        }
    }

    /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
    template <typename Func>
    void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
-        ForEachModifiedRange<false, true>(query_cpu_range, size, func);
+        ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func);
    }

    /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
    template <typename Func>
    void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) {
-        ForEachModifiedRange<true, false>(query_cpu_range, size, func);
+        ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func);
    }

    /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
    template <typename Func>
    void ForEachDownloadRange(Func&& func) {
-        ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func);
+        ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func);
    }

    /// Mark buffer as picked
@@ -206,6 +251,16 @@ public:
        flags &= ~BufferFlagBits::Picked;
    }

+    /// Increases the likeliness of this being a stream buffer
+    void IncreaseStreamScore(int score) noexcept {
+        stream_score += score;
+    }
+
+    /// Returns the likeliness of this being a stream buffer
+    [[nodiscard]] int StreamScore() const noexcept {
+        return stream_score;
+    }
+
    /// Returns true when vaddr -> vaddr+size is fully contained in the buffer
    [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
        return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
@@ -216,6 +271,11 @@ public:
        return True(flags & BufferFlagBits::Picked);
    }

+    /// Returns true when the buffer has pending cached writes
+    [[nodiscard]] bool HasCachedWrites() const noexcept {
+        return True(flags & BufferFlagBits::CachedWrites);
+    }
+
    /// Returns the base CPU address of the buffer
    [[nodiscard]] VAddr CpuAddr() const noexcept {
        return cpu_addr;
@@ -233,26 +293,48 @@ public:
    }

 private:
+    template <Type type>
+    u64* Array() noexcept {
+        if constexpr (type == Type::CPU) {
+            return words.cpu.Pointer(IsShort());
+        } else if constexpr (type == Type::GPU) {
+            return words.gpu.Pointer(IsShort());
+        } else if constexpr (type == Type::CachedCPU) {
+            return words.cached_cpu.Pointer(IsShort());
+        } else if constexpr (type == Type::Untracked) {
+            return words.untracked.Pointer(IsShort());
+        }
+    }
+
+    template <Type type>
+    const u64* Array() const noexcept {
+        if constexpr (type == Type::CPU) {
+            return words.cpu.Pointer(IsShort());
+        } else if constexpr (type == Type::GPU) {
+            return words.gpu.Pointer(IsShort());
+        } else if constexpr (type == Type::CachedCPU) {
+            return words.cached_cpu.Pointer(IsShort());
+        } else if constexpr (type == Type::Untracked) {
+            return words.untracked.Pointer(IsShort());
+        }
+    }
+
    /**
     * Change the state of a range of pages
     *
-     * @param written_words Pages to be marked or unmarked as modified
     * @param dirty_addr    Base address to mark or unmark as modified
     * @param size          Size in bytes to mark or unmark as modified
-     *
-     * @tparam enable            True when the bits will be set to one, false for zero
-     * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes
     */
-    template <bool enable, bool notify_rasterizer>
-    void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr,
-                           s64 size) noexcept(!notify_rasterizer) {
+    template <Type type, bool enable>
+    void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) {
        const s64 difference = dirty_addr - cpu_addr;
        const u64 offset = std::max<s64>(difference, 0);
        size += std::min<s64>(difference, 0);
        if (offset >= SizeBytes() || size < 0) {
            return;
        }
-        u64* const state_words = written_words.Pointer(IsShort());
+        u64* const untracked_words = Array<Type::Untracked>();
+        u64* const state_words = Array<type>();
        const u64 offset_end = std::min(offset + size, SizeBytes());
        const u64 begin_page_index = offset / BYTES_PER_PAGE;
        const u64 begin_word_index = begin_page_index / PAGES_PER_WORD;
@@ -268,13 +350,19 @@ private:
            u64 bits = ~u64{0};
            bits = (bits >> right_offset) << right_offset;
            bits = (bits << left_offset) >> left_offset;
-            if constexpr (notify_rasterizer) {
-                NotifyRasterizer<!enable>(word_index, state_words[word_index], bits);
+            if constexpr (type == Type::CPU || type == Type::CachedCPU) {
+                NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits);
            }
            if constexpr (enable) {
                state_words[word_index] |= bits;
+                if constexpr (type == Type::CPU || type == Type::CachedCPU) {
+                    untracked_words[word_index] |= bits;
+                }
            } else {
                state_words[word_index] &= ~bits;
+                if constexpr (type == Type::CPU || type == Type::CachedCPU) {
+                    untracked_words[word_index] &= ~bits;
+                }
            }
            page_index = 0;
            ++word_index;
@@ -291,7 +379,7 @@ private:
     * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
     */
    template <bool add_to_rasterizer>
-    void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) {
+    void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
        u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
        VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
        while (changed_bits != 0) {
@@ -315,21 +403,20 @@ private:
     * @param query_cpu_range Base CPU address to loop over
     * @param size            Size in bytes of the CPU range to loop over
     * @param func            Function to call for each turned off region
-     *
-     * @tparam gpu               True for host GPU pages, false for CPU pages
-     * @tparam notify_rasterizer True when the rasterizer should be notified about state changes
     */
-    template <bool gpu, bool notify_rasterizer, typename Func>
+    template <Type type, typename Func>
    void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
+        static_assert(type != Type::Untracked);
+
        const s64 difference = query_cpu_range - cpu_addr;
        const u64 query_begin = std::max<s64>(difference, 0);
        size += std::min<s64>(difference, 0);
        if (query_begin >= SizeBytes() || size < 0) {
            return;
        }
-        const u64* const cpu_words = words.cpu.Pointer(IsShort());
+        u64* const untracked_words = Array<Type::Untracked>();
+        u64* const state_words = Array<type>();
        const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
-        u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
        u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
        u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD);

@@ -345,7 +432,8 @@ private:
        const u64 word_index_end = std::distance(state_words, last_modified_word);

        const unsigned local_page_begin = std::countr_zero(*first_modified_word);
-        const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]);
+        const unsigned local_page_end =
+            static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]);
        const u64 word_page_begin = word_index_begin * PAGES_PER_WORD;
        const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD;
        const u64 query_page_begin = query_begin / BYTES_PER_PAGE;
@@ -371,11 +459,13 @@ private:
            const u64 current_word = state_words[word_index] & bits;
            state_words[word_index] &= ~bits;

-            // Exclude CPU modified pages when visiting GPU pages
-            const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0);
-            if constexpr (notify_rasterizer) {
-                NotifyRasterizer<true>(word_index, word, ~u64{0});
+            if constexpr (type == Type::CPU) {
+                const u64 current_bits = untracked_words[word_index] & bits;
+                untracked_words[word_index] &= ~bits;
+                NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
            }
+            // Exclude CPU modified pages when visiting GPU pages
+            const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
            u64 page = page_begin;
            page_begin = 0;

@@ -416,17 +506,20 @@ private:
     * @param offset Offset in bytes from the start of the buffer
     * @param size   Size in bytes of the region to query for modifications
     */
-    template <bool gpu>
+    template <Type type>
    [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
-        const u64* const cpu_words = words.cpu.Pointer(IsShort());
-        const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
+        static_assert(type != Type::Untracked);
+
+        const u64* const untracked_words = Array<Type::Untracked>();
+        const u64* const state_words = Array<type>();
        const u64 num_query_words = size / BYTES_PER_WORD + 1;
        const u64 word_begin = offset / BYTES_PER_WORD;
        const u64 word_end = std::min(word_begin + num_query_words, NumWords());
        const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
        u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
        for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
-            const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0);
+            const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
+            const u64 word = state_words[word_index] & ~off_word;
            if (word == 0) {
                continue;
            }
@@ -445,13 +538,13 @@ private:
     *
     * @param offset Offset in bytes from the start of the buffer
     * @param size   Size in bytes of the region to query for modifications
-     *
-     * @tparam gpu True to query GPU modified pages, false for CPU pages
     */
-    template <bool gpu>
+    template <Type type>
    [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
-        const u64* const cpu_words = words.cpu.Pointer(IsShort());
-        const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
+        static_assert(type != Type::Untracked);
+
+        const u64* const untracked_words = Array<Type::Untracked>();
+        const u64* const state_words = Array<type>();
        const u64 num_query_words = size / BYTES_PER_WORD + 1;
        const u64 word_begin = offset / BYTES_PER_WORD;
        const u64 word_end = std::min(word_begin + num_query_words, NumWords());
@@ -460,7 +553,8 @@ private:
        u64 begin = std::numeric_limits<u64>::max();
        u64 end = 0;
        for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
-            const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0);
+            const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
+            const u64 word = state_words[word_index] & ~off_word;
            if (word == 0) {
                continue;
            }
@@ -488,8 +582,9 @@ private:

    RasterizerInterface* rasterizer = nullptr;
    VAddr cpu_addr = 0;
-    GpuCpuWords words;
+    Words words;
    BufferFlagBits flags{};
+    int stream_score = 0;
 };

 } // namespace VideoCommon
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -1,62 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace VideoCommon {
-
-class BufferBlock {
-public:
-    [[nodiscard]] bool Overlaps(VAddr start, VAddr end) const {
-        return (cpu_addr < end) && (cpu_addr_end > start);
-    }
-
-    [[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const {
-        return cpu_addr <= other_start && other_end <= cpu_addr_end;
-    }
-
-    [[nodiscard]] std::size_t Offset(VAddr in_addr) const {
-        return static_cast<std::size_t>(in_addr - cpu_addr);
-    }
-
-    [[nodiscard]] VAddr CpuAddr() const {
-        return cpu_addr;
-    }
-
-    [[nodiscard]] VAddr CpuAddrEnd() const {
-        return cpu_addr_end;
-    }
-
-    void SetCpuAddr(VAddr new_addr) {
-        cpu_addr = new_addr;
-        cpu_addr_end = new_addr + size;
-    }
-
-    [[nodiscard]] std::size_t Size() const {
-        return size;
-    }
-
-    [[nodiscard]] u64 Epoch() const {
-        return epoch;
-    }
-
-    void SetEpoch(u64 new_epoch) {
-        epoch = new_epoch;
-    }
-
-protected:
-    explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
-        SetCpuAddr(cpu_addr_);
-    }
-
-private:
-    VAddr cpu_addr{};
-    VAddr cpu_addr_end{};
-    std::size_t size{};
-    u64 epoch{};
-};
-
-} // namespace VideoCommon
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -0,0 +1,13 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/microprofile.h"
+
+namespace VideoCommon {
+
+MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 128, 128));
+MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128));
+MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128));
+
+} // namespace VideoCommon
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
--- a/src/video_core/buffer_cache/map_interval.cpp
+++ b/src/video_core/buffer_cache/map_interval.cpp
@@ -1,33 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cstddef>
-#include <memory>
-
-#include "video_core/buffer_cache/map_interval.h"
-
-namespace VideoCommon {
-
-MapIntervalAllocator::MapIntervalAllocator() {
-    FillFreeList(first_chunk);
-}
-
-MapIntervalAllocator::~MapIntervalAllocator() = default;
-
-void MapIntervalAllocator::AllocateNewChunk() {
-    *new_chunk = std::make_unique<Chunk>();
-    FillFreeList(**new_chunk);
-    new_chunk = &(*new_chunk)->next;
-}
-
-void MapIntervalAllocator::FillFreeList(Chunk& chunk) {
-    const std::size_t old_size = free_list.size();
-    free_list.resize(old_size + chunk.data.size());
-    std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size,
-                   [](MapInterval& interval) { return &interval; });
-}
-
-} // namespace VideoCommon
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -1,93 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <memory>
-#include <vector>
-
-#include <boost/intrusive/set_hook.hpp>
-
-#include "common/common_types.h"
-#include "video_core/gpu.h"
-
-namespace VideoCommon {
-
-struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> {
-    MapInterval() = default;
-
-    /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {}
-
-    explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept
-        : start{start_}, end{end_}, gpu_addr{gpu_addr_} {}
-
-    bool IsInside(VAddr other_start, VAddr other_end) const noexcept {
-        return start <= other_start && other_end <= end;
-    }
-
-    bool Overlaps(VAddr other_start, VAddr other_end) const noexcept {
-        return start < other_end && other_start < end;
-    }
-
-    void MarkAsModified(bool is_modified_, u64 ticks_) noexcept {
-        is_modified = is_modified_;
-        ticks = ticks_;
-    }
-
-    boost::intrusive::set_member_hook<> member_hook_;
-    VAddr start = 0;
-    VAddr end = 0;
-    GPUVAddr gpu_addr = 0;
-    u64 ticks = 0;
-    bool is_written = false;
-    bool is_modified = false;
-    bool is_registered = false;
-    bool is_memory_marked = false;
-    bool is_sync_pending = false;
-};
-
-struct MapIntervalCompare {
-    constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept {
-        return lhs.start < rhs.start;
-    }
-};
-
-class MapIntervalAllocator {
-public:
-    MapIntervalAllocator();
-    ~MapIntervalAllocator();
-
-    MapInterval* Allocate() {
-        if (free_list.empty()) {
-            AllocateNewChunk();
-        }
-        MapInterval* const interval = free_list.back();
-        free_list.pop_back();
-        return interval;
-    }
-
-    void Release(MapInterval* interval) {
-        free_list.push_back(interval);
-    }
-
-private:
-    struct Chunk {
-        std::unique_ptr<Chunk> next;
-        std::array<MapInterval, 0x8000> data;
-    };
-
-    void AllocateNewChunk();
-
-    void FillFreeList(Chunk& chunk);
-
-    std::vector<MapInterval*> free_list;
-
-    Chunk first_chunk;
-
-    std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
-};
-
-} // namespace VideoCommon
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -110,12 +110,10 @@ void Vic::Execute() {
                                           converted_frame_buffer.get(), block_height, 0, 0);

            gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
-            gpu.Maxwell3D().OnMemoryWrite();
        } else {
            // send pitch linear frame
            gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
                                           linear_size);
-            gpu.Maxwell3D().OnMemoryWrite();
        }
        break;
    }
@@ -163,7 +161,6 @@ void Vic::Execute() {
        }
        gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(),
                                       chroma_buffer.size());
-        gpu.Maxwell3D().OnMemoryWrite();
        break;
    }
    default:
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -12,13 +12,30 @@
 #define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32)))

 namespace VideoCommon::Dirty {
-
+namespace {
 using Tegra::Engines::Maxwell3D;

-void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
+void SetupDirtyVertexBuffers(Maxwell3D::DirtyState::Tables& tables) {
+    static constexpr std::size_t num_array = 3;
+    for (std::size_t i = 0; i < Maxwell3D::Regs::NumVertexArrays; ++i) {
+        const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
+        const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
+
+        FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
+        FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
+    }
+}
+
+void SetupIndexBuffer(Maxwell3D::DirtyState::Tables& tables) {
+    FillBlock(tables[0], OFF(index_array), NUM(index_array), IndexBuffer);
+}
+
+void SetupDirtyDescriptors(Maxwell3D::DirtyState::Tables& tables) {
    FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
    FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
+}

+void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) {
    static constexpr std::size_t num_per_rt = NUM(rt[0]);
    static constexpr std::size_t begin = OFF(rt);
    static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
@@ -41,5 +58,13 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
        FillBlock(table, OFF(zeta), NUM(zeta), flag);
    }
 }
+} // Anonymous namespace
+
+void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) {
+    SetupDirtyVertexBuffers(tables);
+    SetupIndexBuffer(tables);
+    SetupDirtyDescriptors(tables);
+    SetupDirtyRenderTargets(tables);
+}

 } // namespace VideoCommon::Dirty
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -30,6 +30,12 @@ enum : u8 {
    ColorBuffer7,
    ZetaBuffer,

+    VertexBuffers,
+    VertexBuffer0,
+    VertexBuffer31 = VertexBuffer0 + 31,
+
+    IndexBuffer,
+
    LastCommonEntry,
 };

@@ -47,6 +53,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_
    FillBlock(tables[1], begin, num, index_b);
 }

-void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
+void SetupDirtyFlags(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);

 } // namespace VideoCommon::Dirty
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -23,8 +23,6 @@ void DmaPusher::DispatchCalls() {
    MICROPROFILE_SCOPE(DispatchCalls);

    gpu.SyncGuestHost();
-    // On entering GPU code, assume all memory may be touched by the ARM core.
-    gpu.Maxwell3D().OnMemoryWrite();

    dma_pushbuffer_subindex = 0;

--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -18,8 +18,8 @@ Fermi2D::Fermi2D() {

 Fermi2D::~Fermi2D() = default;

-void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
-    rasterizer = &rasterizer_;
+void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
 }

 void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -38,7 +38,7 @@ public:
    ~Fermi2D();

    /// Binds a rasterizer to this engine.
-    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);

    /// Write the value to the register identified by method.
    void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -21,8 +21,8 @@ KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manage

 KeplerCompute::~KeplerCompute() = default;

-void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
-    rasterizer = &rasterizer_;
+void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
 }

 void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
@@ -39,7 +39,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
    case KEPLER_COMPUTE_REG_INDEX(data_upload): {
        upload_state.ProcessData(method_argument, is_last_call);
        if (is_last_call) {
-            system.GPU().Maxwell3D().OnMemoryWrite();
        }
        break;
    }
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -46,7 +46,7 @@ public:
    ~KeplerCompute();

    /// Binds a rasterizer to this engine.
-    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);

    static constexpr std::size_t NumConstBuffers = 8;

--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -33,7 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call
    case KEPLERMEMORY_REG_INDEX(data): {
        upload_state.ProcessData(method_argument, is_last_call);
        if (is_last_call) {
-            system.GPU().Maxwell3D().OnMemoryWrite();
        }
        break;
    }
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -30,8 +30,8 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)

 Maxwell3D::~Maxwell3D() = default;

-void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
-    rasterizer = &rasterizer_;
+void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
 }

 void Maxwell3D::InitializeRegisterDefaults() {
@@ -223,7 +223,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
    case MAXWELL3D_REG_INDEX(data_upload):
        upload_state.ProcessData(argument, is_last_call);
        if (is_last_call) {
-            OnMemoryWrite();
        }
        return;
    case MAXWELL3D_REG_INDEX(fragment_barrier):
@@ -570,17 +569,18 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
    }
 }

-void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
+void Maxwell3D::ProcessCBBind(size_t stage_index) {
    // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
-    auto& shader = state.shader_stages[stage_index];
-    auto& bind_data = regs.cb_bind[stage_index];
-
-    ASSERT(bind_data.index < Regs::MaxConstBuffers);
-    auto& buffer = shader.const_buffers[bind_data.index];
-
+    const auto& bind_data = regs.cb_bind[stage_index];
+    auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.index];
    buffer.enabled = bind_data.valid.Value() != 0;
    buffer.address = regs.const_buffer.BufferAddress();
    buffer.size = regs.const_buffer.cb_size;
+
+    const bool is_enabled = bind_data.valid.Value() != 0;
+    const GPUVAddr gpu_addr = is_enabled ? regs.const_buffer.BufferAddress() : 0;
+    const u32 size = is_enabled ? regs.const_buffer.cb_size : 0;
+    rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size);
 }

 void Maxwell3D::ProcessCBData(u32 value) {
@@ -635,7 +635,6 @@ void Maxwell3D::FinishCBData() {

    const u32 id = cb_data_state.id;
    memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
-    OnMemoryWrite();

    cb_data_state.id = null_cb_data;
    cb_data_state.current = null_cb_data;
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -55,7 +55,7 @@ public:
    ~Maxwell3D();

    /// Binds a rasterizer to this engine.
-    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);

    /// Register structure of the Maxwell3D engine.
    /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
@@ -1314,8 +1314,7 @@ public:

                    GPUVAddr LimitAddress() const {
                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) |
-                                                     limit_low) +
-                               1;
+                                                     limit_low);
                    }
                } vertex_array_limit[NumVertexArrays];

@@ -1403,6 +1402,7 @@ public:
        };

        std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
+
        u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
    };

@@ -1452,11 +1452,6 @@ public:
        return *rasterizer;
    }

-    /// Notify a memory write has happened.
-    void OnMemoryWrite() {
-        dirty.flags |= dirty.on_write_stores;
-    }
-
    enum class MMEDrawMode : u32 {
        Undefined,
        Array,
@@ -1478,7 +1473,6 @@ public:
        using Tables = std::array<Table, 2>;

        Flags flags;
-        Flags on_write_stores;
        Tables tables{};
    } dirty;

@@ -1541,7 +1535,7 @@ private:
    void FinishCBData();

    /// Handles a write to the CB_BIND register.
-    void ProcessCBBind(std::size_t stage_index);
+    void ProcessCBBind(size_t stage_index);

    /// Handles a write to the VERTEX_END_GL register, triggering a draw.
    void DrawArrays();
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -60,9 +60,6 @@ void MaxwellDMA::Launch() {
        return;
    }

-    // All copies here update the main memory, so mark all rasterizer states as invalid.
-    system.GPU().Maxwell3D().OnMemoryWrite();
-
    if (is_src_pitch && is_dst_pitch) {
        CopyPitchToPitch();
    } else {
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -143,22 +143,26 @@ private:
    }

    bool ShouldWait() const {
+        std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
        return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
               query_cache.ShouldWaitAsyncFlushes();
    }

    bool ShouldFlush() const {
+        std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
        return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
               query_cache.HasUncommittedFlushes();
    }

    void PopAsyncFlushes() {
+        std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
        texture_cache.PopAsyncFlushes();
        buffer_cache.PopAsyncFlushes();
        query_cache.PopAsyncFlushes();
    }

    void CommitAsyncFlushes() {
+        std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
        texture_cache.CommitAsyncFlushes();
        buffer_cache.CommitAsyncFlushes();
        query_cache.CommitAsyncFlushes();
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -44,8 +44,8 @@ GPU::~GPU() = default;

 void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
    renderer = std::move(renderer_);
+    rasterizer = renderer->ReadRasterizer();

-    VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer();
    memory_manager->BindRasterizer(rasterizer);
    maxwell_3d->BindRasterizer(rasterizer);
    fermi_2d->BindRasterizer(rasterizer);
@@ -171,7 +171,7 @@ void GPU::TickWork() {
        const std::size_t size = request.size;
        flush_requests.pop_front();
        flush_request_mutex.unlock();
-        renderer->Rasterizer().FlushRegion(addr, size);
+        rasterizer->FlushRegion(addr, size);
        current_flush_fence.store(fence);
        flush_request_mutex.lock();
    }
@@ -193,11 +193,11 @@ u64 GPU::GetTicks() const {
 }

 void GPU::FlushCommands() {
-    renderer->Rasterizer().FlushCommands();
+    rasterizer->FlushCommands();
 }

 void GPU::SyncGuestHost() {
-    renderer->Rasterizer().SyncGuestHost();
+    rasterizer->SyncGuestHost();
 }

 enum class GpuSemaphoreOperation {
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -366,6 +366,7 @@ protected:
    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
    std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
    std::unique_ptr<VideoCore::RendererBase> renderer;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
    const bool use_nvdec;

 private:
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -38,6 +38,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
    }

    auto current_context = context.Acquire();
+    VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer();

    CommandDataContainer next;
    while (state.is_running) {
@@ -52,13 +53,13 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
        } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
            renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
        } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
-            renderer.Rasterizer().ReleaseFences();
+            rasterizer->ReleaseFences();
        } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
            system.GPU().TickWork();
        } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
-            renderer.Rasterizer().FlushRegion(flush->addr, flush->size);
+            rasterizer->FlushRegion(flush->addr, flush->size);
        } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
-            renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size);
+            rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
        } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
            return;
        } else {
@@ -84,6 +85,7 @@ ThreadManager::~ThreadManager() {
 void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
                                Core::Frontend::GraphicsContext& context,
                                Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) {
+    rasterizer = renderer.ReadRasterizer();
    thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
                         std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher));
 }
@@ -129,12 +131,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
 }

 void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
-    system.Renderer().Rasterizer().OnCPUWrite(addr, size);
+    rasterizer->OnCPUWrite(addr, size);
 }

 void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
    // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
-    system.Renderer().Rasterizer().OnCPUWrite(addr, size);
+    rasterizer->OnCPUWrite(addr, size);
 }

 void ThreadManager::WaitIdle() const {
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -27,6 +27,7 @@ class System;
 } // namespace Core

 namespace VideoCore {
+class RasterizerInterface;
 class RendererBase;
 } // namespace VideoCore

@@ -151,11 +152,12 @@ private:
    /// Pushes a command to be executed by the GPU thread
    u64 PushCommand(CommandData&& command_data);

-    SynchState state;
    Core::System& system;
-    std::thread thread;
-    std::thread::id thread_id;
    const bool is_async;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+
+    SynchState state;
+    std::thread thread;
 };

 } // namespace VideoCommon::GPUThread
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -12,7 +12,6 @@ set(SHADER_FILES
    vulkan_blit_depth_stencil.frag
    vulkan_present.frag
    vulkan_present.vert
-    vulkan_quad_array.comp
    vulkan_quad_indexed.comp
    vulkan_uint8.comp
 )
--- a/src/video_core/host_shaders/vulkan_quad_array.comp
+++ b/src/video_core/host_shaders/vulkan_quad_array.comp
@@ -1,28 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#version 460 core
-
-layout (local_size_x = 1024) in;
-
-layout (std430, set = 0, binding = 0) buffer OutputBuffer {
-    uint output_indexes[];
-};
-
-layout (push_constant) uniform PushConstants {
-    uint first;
-};
-
-void main() {
-    uint primitive = gl_GlobalInvocationID.x;
-    if (primitive * 6 >= output_indexes.length()) {
-        return;
-    }
-
-    const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3);
-    for (uint vertex = 0; vertex < 6; ++vertex) {
-        uint index = first + primitive * 4 + quad_map[vertex];
-        output_indexes[primitive * 6 + vertex] = index;
-    }
-}
--- a/src/video_core/host_shaders/vulkan_uint8.comp
+++ b/src/video_core/host_shaders/vulkan_uint8.comp
@@ -16,9 +16,16 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
    uint16_t output_indexes[];
 };

+uint AssembleIndex(uint id) {
+    // Most primitive restart indices are 0xFF
+    // Hardcode this to 0xFF for now
+    uint index = uint(input_indexes[id]);
+    return index == 0xFF ? 0xFFFF : index;
+}
+
 void main() {
    uint id = gl_GlobalInvocationID.x;
    if (id < input_indexes.length()) {
-        output_indexes[id] = uint16_t(input_indexes[id]);
+        output_indexes[id] = uint16_t(AssembleIndex(id));
    }
 }
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -21,8 +21,8 @@ MemoryManager::MemoryManager(Core::System& system_)

 MemoryManager::~MemoryManager() = default;

-void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
-    rasterizer = &rasterizer_;
+void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
 }

 GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -72,7 +72,7 @@ public:
    ~MemoryManager();

    /// Binds a renderer to the memory manager.
-    void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);

    [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;

@@ -157,6 +157,8 @@ private:

    using MapRange = std::pair<GPUVAddr, size_t>;
    std::vector<MapRange> map_ranges;
+
+    std::vector<std::pair<VAddr, std::size_t>> cache_invalidate_queue;
 };

 } // namespace Tegra
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -7,6 +7,7 @@
 #include <atomic>
 #include <functional>
 #include <optional>
+#include <span>
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/gpu.h"
@@ -49,6 +50,10 @@ public:
    /// Records a GPU query and caches it
    virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;

+    /// Signal an uniform buffer binding
+    virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+                                           u32 size) = 0;
+
    /// Signal a GPU based semaphore as a fence
    virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;

--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -37,15 +37,11 @@ public:
                          std::unique_ptr<Core::Frontend::GraphicsContext> context);
    virtual ~RendererBase();

-    /// Initialize the renderer
-    [[nodiscard]] virtual bool Init() = 0;
-
-    /// Shutdown the renderer
-    virtual void ShutDown() = 0;
-
    /// Finalize rendering the guest frame and draw into the presentation texture
    virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;

+    [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
+
    // Getter/setter functions:
    // ------------------------

@@ -57,14 +53,6 @@ public:
        return m_current_frame;
    }

-    [[nodiscard]] RasterizerInterface& Rasterizer() {
-        return *rasterizer;
-    }
-
-    [[nodiscard]] const RasterizerInterface& Rasterizer() const {
-        return *rasterizer;
-    }
-
    [[nodiscard]] Core::Frontend::GraphicsContext& Context() {
        return *context;
    }
@@ -98,7 +86,6 @@ public:

 protected:
    Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
-    std::unique_ptr<RasterizerInterface> rasterizer;
    std::unique_ptr<Core::Frontend::GraphicsContext> context;
    f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
    int m_current_frame = 0;  ///< Current frame, should be set by the renderer
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,98 +2,208 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <memory>
+#include <span>

-#include <glad/glad.h>
-
-#include "common/assert.h"
-#include "common/microprofile.h"
 #include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"

 namespace OpenGL {
+namespace {
+struct BindlessSSBO {
+    GLuint64EXT address;
+    GLsizei length;
+    GLsizei padding;
+};
+static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4);

-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+constexpr std::array PROGRAM_LUT{
+    GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+    GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
+};
+} // Anonymous namespace

-MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
+Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
+    : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}

-Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_)
-    : BufferBlock{cpu_addr_, size_} {
-    gl_buffer.Create();
-    glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW);
-    if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) {
-        glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
-        glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
+Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
+               VAddr cpu_addr_, u64 size_bytes_)
+    : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
+    buffer.Create();
+    const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
+    glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
+    glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
+
+    if (runtime.has_unified_vertex_buffers) {
+        glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
    }
 }

-Buffer::~Buffer() = default;
-
-void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
-    glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset),
-                         static_cast<GLsizeiptr>(data_size), data);
+void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
+    glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
+                         static_cast<GLsizeiptr>(data.size_bytes()), data.data());
 }

-void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
-    MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
-    const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size);
-    const GLintptr gl_offset = static_cast<GLintptr>(offset);
-    if (read_buffer.handle == 0) {
-        read_buffer.Create();
-        glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
-                          GL_STREAM_READ);
-    }
-    glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
-    glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
-    glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
+void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept {
+    glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
+                            static_cast<GLsizeiptr>(data.size_bytes()), data.data());
 }

-void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
-                      std::size_t copy_size) {
-    glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
-                             static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size));
-}
-
-OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
-                               Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                               const Device& device_, OGLStreamBuffer& stream_buffer_,
-                               StateTracker& state_tracker)
-    : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
-    if (!device.HasFastBufferSubData()) {
+void Buffer::MakeResident(GLenum access) noexcept {
+    // Abuse GLenum's order to exit early
+    // GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
+    if (access <= current_residency_access || buffer.handle == 0) {
        return;
    }
+    if (std::exchange(current_residency_access, access) != GL_NONE) {
+        // If the buffer is already resident, remove its residency before promoting it
+        glMakeNamedBufferNonResidentNV(buffer.handle);
+    }
+    glMakeNamedBufferResidentNV(buffer.handle, access);
+}

-    static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
-    glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
-    for (const GLuint cbuf : cbufs) {
-        glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
+BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
+    : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
+      use_assembly_shaders{device.UseAssemblyShaders()},
+      has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
+      stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
+    GLint gl_max_attributes;
+    glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
+    max_attributes = static_cast<u32>(gl_max_attributes);
+    for (auto& stage_uniforms : fast_uniforms) {
+        for (OGLBuffer& buffer : stage_uniforms) {
+            buffer.Create();
+            glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
+        }
+    }
+    for (auto& stage_uniforms : copy_uniforms) {
+        for (OGLBuffer& buffer : stage_uniforms) {
+            buffer.Create();
+            glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
+        }
+    }
+    for (OGLBuffer& buffer : copy_compute_uniforms) {
+        buffer.Create();
+        glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY);
    }
 }

-OGLBufferCache::~OGLBufferCache() {
-    glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
+void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
+                                    std::span<const VideoCommon::BufferCopy> copies) {
+    for (const VideoCommon::BufferCopy& copy : copies) {
+        glCopyNamedBufferSubData(
+            src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
+            static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
+    }
 }

-std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
-    return std::make_shared<Buffer>(device, cpu_addr, size);
+void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
+    if (has_unified_vertex_buffers) {
+        buffer.MakeResident(GL_READ_ONLY);
+        glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
+                               static_cast<GLsizeiptr>(size));
+    } else {
+        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
+        index_buffer_offset = offset;
+    }
 }

-OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
-    return {0, 0, 0};
+void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
+                                          u32 stride) {
+    if (index >= max_attributes) {
+        return;
+    }
+    if (has_unified_vertex_buffers) {
+        buffer.MakeResident(GL_READ_ONLY);
+        glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
+        glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
+                               buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
+    } else {
+        glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
+                           static_cast<GLsizei>(stride));
+    }
 }

-OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
-                                                             std::size_t size) {
-    DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
-    const GLuint cbuf = cbufs[cbuf_cursor++];
+void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
+                                           u32 offset, u32 size) {
+    if (use_assembly_shaders) {
+        GLuint handle;
+        if (offset != 0) {
+            handle = copy_uniforms[stage][binding_index].handle;
+            glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
+        } else {
+            handle = buffer.Handle();
+        }
+        glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
+                            static_cast<GLsizeiptr>(size));
+    } else {
+        const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+        const GLuint binding = base_binding + binding_index;
+        glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
+                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+    }
+}

-    glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
-    return {cbuf, 0, 0};
+void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset,
+                                                  u32 size) {
+    if (use_assembly_shaders) {
+        GLuint handle;
+        if (offset != 0) {
+            handle = copy_compute_uniforms[binding_index].handle;
+            glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size);
+        } else {
+            handle = buffer.Handle();
+        }
+        glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0,
+                            static_cast<GLsizeiptr>(size));
+    } else {
+        glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(),
+                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+    }
+}
+
+void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
+                                           u32 offset, u32 size, bool is_written) {
+    if (use_assembly_shaders) {
+        const BindlessSSBO ssbo{
+            .address = buffer.HostGpuAddr() + offset,
+            .length = static_cast<GLsizei>(size),
+            .padding = 0,
+        };
+        buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
+        glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
+                                        reinterpret_cast<const GLuint*>(&ssbo));
+    } else {
+        const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
+        const GLuint binding = base_binding + binding_index;
+        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
+                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+    }
+}
+
+void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
+                                                  u32 size, bool is_written) {
+    if (use_assembly_shaders) {
+        const BindlessSSBO ssbo{
+            .address = buffer.HostGpuAddr() + offset,
+            .length = static_cast<GLsizei>(size),
+            .padding = 0,
+        };
+        buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
+        glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
+                                        reinterpret_cast<const GLuint*>(&ssbo));
+    } else if (size == 0) {
+        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
+    } else {
+        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
+                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+    }
+}
+
+void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset,
+                                                     u32 size) {
+    glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(),
+                      static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
 }

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -5,79 +5,157 @@
 #pragma once

 #include <array>
-#include <memory>
+#include <span>

+#include "common/alignment.h"
 #include "common/common_types.h"
+#include "common/dynamic_library.h"
 #include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_stream_buffer.h"

-namespace Core {
-class System;
-}
-
 namespace OpenGL {

-class Device;
-class OGLStreamBuffer;
-class RasterizerOpenGL;
-class StateTracker;
+class BufferCacheRuntime;

-class Buffer : public VideoCommon::BufferBlock {
+class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> {
 public:
-    explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_);
-    ~Buffer();
+    explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr,
+                    u64 size_bytes);
+    explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);

-    void Upload(std::size_t offset, std::size_t data_size, const u8* data);
+    void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;

-    void Download(std::size_t offset, std::size_t data_size, u8* data);
+    void ImmediateDownload(size_t offset, std::span<u8> data) noexcept;

-    void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
-                  std::size_t copy_size);
+    void MakeResident(GLenum access) noexcept;

-    GLuint Handle() const noexcept {
-        return gl_buffer.handle;
+    [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
+        return address;
    }

-    u64 Address() const noexcept {
-        return gpu_address;
+    [[nodiscard]] GLuint Handle() const noexcept {
+        return buffer.handle;
    }

 private:
-    OGLBuffer gl_buffer;
-    OGLBuffer read_buffer;
-    u64 gpu_address = 0;
+    GLuint64EXT address = 0;
+    OGLBuffer buffer;
+    GLenum current_residency_access = GL_NONE;
 };

-using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
-class OGLBufferCache final : public GenericBufferCache {
+class BufferCacheRuntime {
+    friend Buffer;
+
 public:
-    explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
-                            Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
-                            const Device& device, OGLStreamBuffer& stream_buffer,
-                            StateTracker& state_tracker);
-    ~OGLBufferCache();
+    static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();

-    BufferInfo GetEmptyBuffer(std::size_t) override;
+    explicit BufferCacheRuntime(const Device& device_);

-    void Acquire() noexcept {
-        cbuf_cursor = 0;
+    void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
+                    std::span<const VideoCommon::BufferCopy> copies);
+
+    void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
+
+    void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
+
+    void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size);
+
+    void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size);
+
+    void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size,
+                           bool is_written);
+
+    void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size,
+                                  bool is_written);
+
+    void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
+
+    void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
+        if (use_assembly_shaders) {
+            const GLuint handle = fast_uniforms[stage][binding_index].handle;
+            const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
+            glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
+        } else {
+            const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+            const GLuint binding = base_binding + binding_index;
+            glBindBufferRange(GL_UNIFORM_BUFFER, binding,
+                              fast_uniforms[stage][binding_index].handle, 0,
+                              static_cast<GLsizeiptr>(size));
+        }
    }

-protected:
-    std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
+    void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) {
+        if (use_assembly_shaders) {
+            glProgramBufferParametersIuivNV(
+                PABO_LUT[stage], binding_index, 0,
+                static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)),
+                reinterpret_cast<const GLuint*>(data.data()));
+        } else {
+            glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0,
+                                 static_cast<GLsizeiptr>(data.size_bytes()), data.data());
+        }
+    }

-    BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
+    std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
+        const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
+        const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+        const GLuint binding = base_binding + binding_index;
+        glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
+                          static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+        return mapped_span;
+    }
+
+    [[nodiscard]] const GLvoid* IndexOffset() const noexcept {
+        return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset));
+    }
+
+    [[nodiscard]] bool HasFastBufferSubData() const noexcept {
+        return has_fast_buffer_sub_data;
+    }

 private:
-    static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
-                                             Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
+    static constexpr std::array PABO_LUT{
+        GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV,          GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
+        GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
+        GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
+    };

    const Device& device;

-    std::size_t cbuf_cursor = 0;
-    std::array<GLuint, NUM_CBUFS> cbufs{};
+    bool has_fast_buffer_sub_data = false;
+    bool use_assembly_shaders = false;
+    bool has_unified_vertex_buffers = false;
+
+    u32 max_attributes = 0;
+
+    std::optional<StreamBuffer> stream_buffer;
+
+    std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
+               VideoCommon::NUM_STAGES>
+        fast_uniforms;
+    std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
+               VideoCommon::NUM_STAGES>
+        copy_uniforms;
+    std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms;
+
+    u32 index_buffer_offset = 0;
 };

+struct BufferCacheParams {
+    using Runtime = OpenGL::BufferCacheRuntime;
+    using Buffer = OpenGL::Buffer;
+
+    static constexpr bool IS_OPENGL = true;
+    static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
+    static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
+    static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
+    static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
+    static constexpr bool USE_MEMORY_MAPS = false;
+};
+
+using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
+
 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -21,9 +21,7 @@
 #include "video_core/renderer_opengl/gl_resource_manager.h"

 namespace OpenGL {
-
 namespace {
-
 // One uniform block is reserved for emulation purposes
 constexpr u32 ReservedUniformBlocks = 1;

@@ -197,11 +195,13 @@ bool IsASTCSupported() {
    const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
    return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
 }
-
 } // Anonymous namespace

-Device::Device()
-    : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
+Device::Device() {
+    if (!GLAD_GL_VERSION_4_6) {
+        LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available");
+        throw std::runtime_error{"Insufficient version"};
+    }
    const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
    const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
    const std::vector extensions = GetExtensions();
@@ -217,6 +217,9 @@ Device::Device()
            "Beta driver 443.24 is known to have issues. There might be performance issues.");
        disable_fast_buffer_sub_data = true;
    }
+
+    max_uniform_buffers = BuildMaxUniformBuffers();
+    base_bindings = BuildBaseBindings();
    uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
    shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
    max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -10,11 +10,9 @@

 namespace OpenGL {

-static constexpr u32 EmulationUniformBlockBinding = 0;
-
-class Device final {
+class Device {
 public:
-    struct BaseBindings final {
+    struct BaseBindings {
        u32 uniform_buffer{};
        u32 shader_storage_buffer{};
        u32 sampler{};
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -47,7 +47,7 @@ void GLInnerFence::Wait() {

 FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
                                       Tegra::GPU& gpu_, TextureCache& texture_cache_,
-                                       OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
+                                       BufferCache& buffer_cache_, QueryCache& query_cache_)
    : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}

 Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -32,14 +32,13 @@ private:
 };

 using Fence = std::shared_ptr<GLInnerFence>;
-using GenericFenceManager =
-    VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
+using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>;

 class FenceManagerOpenGL final : public GenericFenceManager {
 public:
-    explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
-                                TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
-                                QueryCache& query_cache_);
+    explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+                                TextureCache& texture_cache, BufferCache& buffer_cache,
+                                QueryCache& query_cache);

 protected:
    Fence CreateFence(u32 value, bool is_stubbed) override;
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -38,34 +38,21 @@
 namespace OpenGL {

 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using GLvec4 = std::array<GLfloat, 4>;

 using Tegra::Engines::ShaderType;
 using VideoCore::Surface::PixelFormat;
 using VideoCore::Surface::SurfaceTarget;
 using VideoCore::Surface::SurfaceType;

-MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
-MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
+MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));

 namespace {

-constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
-constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
-    NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
-constexpr size_t TOTAL_CONST_BUFFER_BYTES =
-    NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
-
 constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
-constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;

 struct TextureHandle {
    constexpr TextureHandle(u32 data, bool via_header_index) {
@@ -101,20 +88,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const
    return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
 }

-std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
-                               const ConstBufferEntry& entry) {
-    if (!entry.IsIndirect()) {
-        return entry.GetSize();
-    }
-    if (buffer.size > Maxwell::MaxConstBufferSize) {
-        LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
-                    Maxwell::MaxConstBufferSize);
-        return Maxwell::MaxConstBufferSize;
-    }
-
-    return buffer.size;
-}
-
 /// Translates hardware transform feedback indices
 /// @param location Hardware location
 /// @return Pair of ARB_transform_feedback3 token stream first and third arguments
@@ -147,14 +120,6 @@ void oglEnable(GLenum cap, bool state) {
    (state ? glEnable : glDisable)(cap);
 }

-void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
-    if (num_ssbos == 0) {
-        return;
-    }
-    glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
-                                    reinterpret_cast<const GLuint*>(ssbos));
-}
-
 ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
    if (entry.is_buffer) {
        return ImageViewType::Buffer;
@@ -201,44 +166,28 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
    : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
      kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
      screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
-      stream_buffer(device, state_tracker),
      texture_cache_runtime(device, program_manager, state_tracker),
      texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
+      buffer_cache_runtime(device),
+      buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
      shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
      query_cache(*this, maxwell3d, gpu_memory),
-      buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
      fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
      async_shaders(emu_window_) {
-    unified_uniform_buffer.Create();
-    glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
-
-    if (device.UseAssemblyShaders()) {
-        glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
-        for (const GLuint cbuf : staging_cbufs) {
-            glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
-                                 nullptr, 0);
-        }
-    }
    if (device.UseAsynchronousShaders()) {
        async_shaders.AllocateWorkers();
    }
 }

-RasterizerOpenGL::~RasterizerOpenGL() {
-    if (device.UseAssemblyShaders()) {
-        glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
-    }
-}
+RasterizerOpenGL::~RasterizerOpenGL() = default;

-void RasterizerOpenGL::SetupVertexFormat() {
+void RasterizerOpenGL::SyncVertexFormats() {
    auto& flags = maxwell3d.dirty.flags;
    if (!flags[Dirty::VertexFormats]) {
        return;
    }
    flags[Dirty::VertexFormats] = false;

-    MICROPROFILE_SCOPE(OpenGL_VAO);
-
    // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
    // the first 16 vertex attributes always, as we don't know which ones are actually used until
    // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
@@ -274,55 +223,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
    }
 }

-void RasterizerOpenGL::SetupVertexBuffer() {
-    auto& flags = maxwell3d.dirty.flags;
-    if (!flags[Dirty::VertexBuffers]) {
-        return;
-    }
-    flags[Dirty::VertexBuffers] = false;
-
-    MICROPROFILE_SCOPE(OpenGL_VB);
-
-    const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
-
-    // Upload all guest vertex arrays sequentially to our buffer
-    const auto& regs = maxwell3d.regs;
-    for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
-        if (!flags[Dirty::VertexBuffer0 + index]) {
-            continue;
-        }
-        flags[Dirty::VertexBuffer0 + index] = false;
-
-        const auto& vertex_array = regs.vertex_array[index];
-        if (!vertex_array.IsEnabled()) {
-            continue;
-        }
-
-        const GPUVAddr start = vertex_array.StartAddress();
-        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
-        ASSERT(end >= start);
-
-        const GLuint gl_index = static_cast<GLuint>(index);
-        const u64 size = end - start;
-        if (size == 0) {
-            glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
-            if (use_unified_memory) {
-                glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
-            }
-            continue;
-        }
-        const auto info = buffer_cache.UploadMemory(start, size);
-        if (use_unified_memory) {
-            glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
-            glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
-                                   info.address + info.offset, size);
-        } else {
-            glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
-        }
-    }
-}
-
-void RasterizerOpenGL::SetupVertexInstances() {
+void RasterizerOpenGL::SyncVertexInstances() {
    auto& flags = maxwell3d.dirty.flags;
    if (!flags[Dirty::VertexInstances]) {
        return;
@@ -343,17 +244,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
    }
 }

-GLintptr RasterizerOpenGL::SetupIndexBuffer() {
-    MICROPROFILE_SCOPE(OpenGL_Index);
-    const auto& regs = maxwell3d.regs;
-    const std::size_t size = CalculateIndexBufferSize();
-    const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
-    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
-    return info.offset;
-}
-
-void RasterizerOpenGL::SetupShaders() {
-    MICROPROFILE_SCOPE(OpenGL_Shader);
+void RasterizerOpenGL::SetupShaders(bool is_indexed) {
    u32 clip_distances = 0;

    std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
@@ -410,11 +301,19 @@ void RasterizerOpenGL::SetupShaders() {
        const size_t stage = index == 0 ? 0 : index - 1;
        shaders[stage] = shader;

-        SetupDrawConstBuffers(stage, shader);
-        SetupDrawGlobalMemory(stage, shader);
        SetupDrawTextures(shader, stage);
        SetupDrawImages(shader, stage);

+        buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
+
+        buffer_cache.UnbindGraphicsStorageBuffers(stage);
+        u32 ssbo_index = 0;
+        for (const auto& buffer : shader->GetEntries().global_memory_entries) {
+            buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
+                                                   buffer.cbuf_offset, buffer.is_written);
+            ++ssbo_index;
+        }
+
        // Workaround for Intel drivers.
        // When a clip distance is enabled but not set in the shader it crops parts of the screen
        // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -430,43 +329,26 @@ void RasterizerOpenGL::SetupShaders() {
    SyncClipEnabled(clip_distances);
    maxwell3d.dirty.flags[Dirty::Shaders] = false;

+    buffer_cache.UpdateGraphicsBuffers(is_indexed);
+
    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
    texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);

+    buffer_cache.BindHostGeometryBuffers(is_indexed);
+
    size_t image_view_index = 0;
    size_t texture_index = 0;
    size_t image_index = 0;
    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
        const Shader* const shader = shaders[stage];
-        if (shader) {
-            const auto base = device.GetBaseBindings(stage);
-            BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
-                         texture_index, image_index);
-        }
-    }
-}
-
-std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
-    const auto& regs = maxwell3d.regs;
-
-    std::size_t size = 0;
-    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        if (!regs.vertex_array[index].IsEnabled())
+        if (!shader) {
            continue;
-
-        const GPUVAddr start = regs.vertex_array[index].StartAddress();
-        const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
-
-        size += end - start;
-        ASSERT(end >= start);
+        }
+        buffer_cache.BindHostStageBuffers(stage);
+        const auto& base = device.GetBaseBindings(stage);
+        BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
+                     texture_index, image_index);
    }
-
-    return size;
-}
-
-std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
-    return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
-           static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
 }

 void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
@@ -475,6 +357,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
 }

 void RasterizerOpenGL::Clear() {
+    MICROPROFILE_SCOPE(OpenGL_Clears);
    if (!maxwell3d.ShouldExecute()) {
        return;
    }
@@ -525,11 +408,9 @@ void RasterizerOpenGL::Clear() {
    }
    UNIMPLEMENTED_IF(regs.clear_flags.viewport);

-    {
-        auto lock = texture_cache.AcquireLock();
-        texture_cache.UpdateRenderTargets(true);
-        state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
-    }
+    std::scoped_lock lock{texture_cache.mutex};
+    texture_cache.UpdateRenderTargets(true);
+    state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());

    if (use_color) {
        glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
@@ -541,7 +422,6 @@ void RasterizerOpenGL::Clear() {
    } else if (use_stencil) {
        glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
    }
-
    ++num_queued_commands;
 }

@@ -550,75 +430,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {

    query_cache.UpdateCounters();

-    SyncViewport();
-    SyncRasterizeEnable();
-    SyncPolygonModes();
-    SyncColorMask();
-    SyncFragmentColorClampState();
-    SyncMultiSampleState();
-    SyncDepthTestState();
-    SyncDepthClamp();
-    SyncStencilTestState();
-    SyncBlendState();
-    SyncLogicOpState();
-    SyncCullMode();
-    SyncPrimitiveRestart();
-    SyncScissorTest();
-    SyncPointState();
-    SyncLineState();
-    SyncPolygonOffset();
-    SyncAlphaTest();
-    SyncFramebufferSRGB();
-
-    buffer_cache.Acquire();
-    current_cbuf = 0;
-
-    std::size_t buffer_size = CalculateVertexArraysSize();
-
-    // Add space for index buffer
-    if (is_indexed) {
-        buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
-    }
-
-    // Uniform space for the 5 shader stages
-    buffer_size =
-        Common::AlignUp<std::size_t>(buffer_size, 4) +
-        (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
-
-    // Add space for at least 18 constant buffers
-    buffer_size += Maxwell::MaxConstBuffers *
-                   (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
-
-    // Prepare the vertex array.
-    buffer_cache.Map(buffer_size);
-
-    // Prepare vertex array format.
-    SetupVertexFormat();
-
-    // Upload vertex and index data.
-    SetupVertexBuffer();
-    SetupVertexInstances();
-    GLintptr index_buffer_offset = 0;
-    if (is_indexed) {
-        index_buffer_offset = SetupIndexBuffer();
-    }
-
-    // Setup emulation uniform buffer.
-    if (!device.UseAssemblyShaders()) {
-        MaxwellUniformData ubo;
-        ubo.SetFromRegs(maxwell3d);
-        const auto info =
-            buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
-        glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
-                          static_cast<GLsizeiptr>(sizeof(ubo)));
-    }
+    SyncState();

    // Setup shaders and their used resources.
-    auto lock = texture_cache.AcquireLock();
-    SetupShaders();
+    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+    SetupShaders(is_indexed);

-    // Signal the buffer cache that we are not going to upload more things.
-    buffer_cache.Unmap();
    texture_cache.UpdateRenderTargets(false);
    state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
    program_manager.BindGraphicsPipeline();
@@ -632,7 +449,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
    if (is_indexed) {
        const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
        const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
-        const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
+        const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
        const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
        if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
            glDrawElements(primitive_mode, num_vertices, format, offset);
@@ -672,22 +489,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
 }

 void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
-    buffer_cache.Acquire();
-    current_cbuf = 0;
-
    Shader* const kernel = shader_cache.GetComputeKernel(code_addr);

-    auto lock = texture_cache.AcquireLock();
+    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
    BindComputeTextures(kernel);

-    const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
-                               (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
-    buffer_cache.Map(buffer_size);
-
-    SetupComputeConstBuffers(kernel);
-    SetupComputeGlobalMemory(kernel);
-
-    buffer_cache.Unmap();
+    const auto& entries = kernel->GetEntries();
+    buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
+    buffer_cache.UnbindComputeStorageBuffers();
+    u32 ssbo_index = 0;
+    for (const auto& buffer : entries.global_memory_entries) {
+        buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
+                                              buffer.is_written);
+        ++ssbo_index;
+    }
+    buffer_cache.UpdateComputeBuffers();
+    buffer_cache.BindHostComputeBuffers();

    const auto& launch_desc = kepler_compute.launch_description;
    glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
@@ -703,6 +520,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
    query_cache.Query(gpu_addr, type, timestamp);
 }

+void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
+                                                 u32 size) {
+    std::scoped_lock lock{buffer_cache.mutex};
+    buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
+}
+
 void RasterizerOpenGL::FlushAll() {}

 void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
@@ -711,19 +534,23 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
        return;
    }
    {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
        texture_cache.DownloadMemory(addr, size);
    }
-    buffer_cache.FlushRegion(addr, size);
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.DownloadMemory(addr, size);
+    }
    query_cache.FlushRegion(addr, size);
 }

 bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
+    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
    if (!Settings::IsGPULevelHigh()) {
-        return buffer_cache.MustFlushRegion(addr, size);
+        return buffer_cache.IsRegionGpuModified(addr, size);
    }
    return texture_cache.IsRegionGpuModified(addr, size) ||
-           buffer_cache.MustFlushRegion(addr, size);
+           buffer_cache.IsRegionGpuModified(addr, size);
 }

 void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -732,11 +559,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
        return;
    }
    {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
        texture_cache.WriteMemory(addr, size);
    }
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.WriteMemory(addr, size);
+    }
    shader_cache.InvalidateRegion(addr, size);
-    buffer_cache.InvalidateRegion(addr, size);
    query_cache.InvalidateRegion(addr, size);
 }

@@ -745,26 +575,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
    if (addr == 0 || size == 0) {
        return;
    }
+    shader_cache.OnCPUWrite(addr, size);
    {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
        texture_cache.WriteMemory(addr, size);
    }
-    shader_cache.OnCPUWrite(addr, size);
-    buffer_cache.OnCPUWrite(addr, size);
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.CachedWriteMemory(addr, size);
+    }
 }

 void RasterizerOpenGL::SyncGuestHost() {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-    buffer_cache.SyncGuestHost();
    shader_cache.SyncGuestHost();
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.FlushCachedWrites();
+    }
 }

 void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
    {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
        texture_cache.UnmapMemory(addr, size);
    }
-    buffer_cache.OnCPUWrite(addr, size);
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.WriteMemory(addr, size);
+    }
    shader_cache.OnCPUWrite(addr, size);
 }

@@ -799,14 +638,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
 }

 void RasterizerOpenGL::WaitForIdle() {
-    // Place a barrier on everything that is not framebuffer related.
-    // This is related to another flag that is not currently implemented.
-    glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
-                    GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
-                    GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
-                    GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
-                    GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
-                    GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
+    glMemoryBarrier(GL_ALL_BARRIER_BITS);
 }

 void RasterizerOpenGL::FragmentBarrier() {
@@ -831,18 +663,21 @@ void RasterizerOpenGL::TickFrame() {
    num_queued_commands = 0;

    fence_manager.TickFrame();
-    buffer_cache.TickFrame();
    {
-        auto lock = texture_cache.AcquireLock();
+        std::scoped_lock lock{texture_cache.mutex};
        texture_cache.TickFrame();
    }
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        buffer_cache.TickFrame();
+    }
 }

 bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
                                             const Tegra::Engines::Fermi2D::Surface& dst,
                                             const Tegra::Engines::Fermi2D::Config& copy_config) {
    MICROPROFILE_SCOPE(OpenGL_Blits);
-    auto lock = texture_cache.AcquireLock();
+    std::scoped_lock lock{texture_cache.mutex};
    texture_cache.BlitImage(dst, src, copy_config);
    return true;
 }
@@ -854,7 +689,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
    }
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);

-    auto lock = texture_cache.AcquireLock();
+    std::scoped_lock lock{texture_cache.mutex};
    ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
    if (!image_view) {
        return false;
@@ -921,166 +756,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te
    }
 }

-void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
-    static constexpr std::array PARAMETER_LUT{
-        GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV,          GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
-        GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
-        GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
-    };
-    MICROPROFILE_SCOPE(OpenGL_UBO);
-    const auto& stages = maxwell3d.state.shader_stages;
-    const auto& shader_stage = stages[stage_index];
-    const auto& entries = shader->GetEntries();
-    const bool use_unified = entries.use_unified_uniforms;
-    const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
-
-    const auto base_bindings = device.GetBaseBindings(stage_index);
-    u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
-    for (const auto& entry : entries.const_buffers) {
-        const u32 index = entry.GetIndex();
-        const auto& buffer = shader_stage.const_buffers[index];
-        SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
-                         base_unified_offset + index * Maxwell::MaxConstBufferSize);
-        ++binding;
-    }
-    if (use_unified) {
-        const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
-                                           entries.global_memory_entries.size());
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
-                          base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
-    }
-}
-
-void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
-    MICROPROFILE_SCOPE(OpenGL_UBO);
-    const auto& launch_desc = kepler_compute.launch_description;
-    const auto& entries = kernel->GetEntries();
-    const bool use_unified = entries.use_unified_uniforms;
-
-    u32 binding = 0;
-    for (const auto& entry : entries.const_buffers) {
-        const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
-        const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
-        Tegra::Engines::ConstBufferInfo buffer;
-        buffer.address = config.Address();
-        buffer.size = config.size;
-        buffer.enabled = mask[entry.GetIndex()];
-        SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
-                         use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
-        ++binding;
-    }
-    if (use_unified) {
-        const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
-                          NUM_CONST_BUFFERS_BYTES_PER_STAGE);
-    }
-}
-
-void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
-                                        const Tegra::Engines::ConstBufferInfo& buffer,
-                                        const ConstBufferEntry& entry, bool use_unified,
-                                        std::size_t unified_offset) {
-    if (!buffer.enabled) {
-        // Set values to zero to unbind buffers
-        if (device.UseAssemblyShaders()) {
-            glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
-        } else {
-            glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
-        }
-        return;
-    }
-
-    // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
-    // UBO alignment requirements.
-    const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
-
-    const bool fast_upload = !use_unified && device.HasFastBufferSubData();
-
-    const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
-    const GPUVAddr gpu_addr = buffer.address;
-    auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
-
-    if (device.UseAssemblyShaders()) {
-        UNIMPLEMENTED_IF(use_unified);
-        if (info.offset != 0) {
-            const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
-            glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
-            info.handle = staging_cbuf;
-            info.offset = 0;
-        }
-        glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
-        return;
-    }
-
-    if (use_unified) {
-        glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
-                                 unified_offset, size);
-    } else {
-        glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
-    }
-}
-
-void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
-    static constexpr std::array TARGET_LUT = {
-        GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
-        GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
-    };
-    const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
-    const auto& entries{shader->GetEntries().global_memory_entries};
-
-    std::array<BindlessSSBO, 32> ssbos;
-    ASSERT(entries.size() < ssbos.size());
-
-    const bool assembly_shaders = device.UseAssemblyShaders();
-    u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
-    for (const auto& entry : entries) {
-        const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
-        const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
-        const u32 size{gpu_memory.Read<u32>(addr + 8)};
-        SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
-        ++binding;
-    }
-    if (assembly_shaders) {
-        UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
-    }
-}
-
-void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
-    const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
-    const auto& entries{kernel->GetEntries().global_memory_entries};
-
-    std::array<BindlessSSBO, 32> ssbos;
-    ASSERT(entries.size() < ssbos.size());
-
-    u32 binding = 0;
-    for (const auto& entry : entries) {
-        const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
-        const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
-        const u32 size{gpu_memory.Read<u32>(addr + 8)};
-        SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
-        ++binding;
-    }
-    if (device.UseAssemblyShaders()) {
-        UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
-    }
-}
-
-void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
-                                         GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
-    const size_t alignment{device.GetShaderStorageBufferAlignment()};
-    const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
-    if (device.UseAssemblyShaders()) {
-        *ssbo = BindlessSSBO{
-            .address = static_cast<GLuint64EXT>(info.address + info.offset),
-            .length = static_cast<GLsizei>(size),
-            .padding = 0,
-        };
-    } else {
-        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
-                          static_cast<GLsizeiptr>(size));
-    }
-}
-
 void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
    const bool via_header_index =
        maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
@@ -1128,6 +803,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
    }
 }

+void RasterizerOpenGL::SyncState() {
+    SyncViewport();
+    SyncRasterizeEnable();
+    SyncPolygonModes();
+    SyncColorMask();
+    SyncFragmentColorClampState();
+    SyncMultiSampleState();
+    SyncDepthTestState();
+    SyncDepthClamp();
+    SyncStencilTestState();
+    SyncBlendState();
+    SyncLogicOpState();
+    SyncCullMode();
+    SyncPrimitiveRestart();
+    SyncScissorTest();
+    SyncPointState();
+    SyncLineState();
+    SyncPolygonOffset();
+    SyncAlphaTest();
+    SyncFramebufferSRGB();
+    SyncVertexFormats();
+    SyncVertexInstances();
+}
+
 void RasterizerOpenGL::SyncViewport() {
    auto& flags = maxwell3d.dirty.flags;
    const auto& regs = maxwell3d.regs;
@@ -1163,9 +862,11 @@ void RasterizerOpenGL::SyncViewport() {
        if (regs.screen_y_control.y_negate != 0) {
            flip_y = !flip_y;
        }
-        glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
-                      regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
-                                                                       : GL_NEGATIVE_ONE_TO_ONE);
+        const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne;
+        const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
+        const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE;
+        state_tracker.ClipControl(origin, depth);
+        state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0);
    }

    if (dirty_viewport) {
@@ -1649,36 +1350,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
    if (regs.tfb_enabled == 0) {
        return;
    }
-
    if (device.UseAssemblyShaders()) {
        SyncTransformFeedback();
    }
-
    UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
                     regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
                     regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
-
-    for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
-        const auto& binding = regs.tfb_bindings[index];
-        if (!binding.buffer_enable) {
-            if (enabled_transform_feedback_buffers[index]) {
-                glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
-                                  0);
-            }
-            enabled_transform_feedback_buffers[index] = false;
-            continue;
-        }
-        enabled_transform_feedback_buffers[index] = true;
-
-        auto& tfb_buffer = transform_feedback_buffers[index];
-        tfb_buffer.Create();
-
-        const GLuint handle = tfb_buffer.handle;
-        const std::size_t size = binding.buffer_size;
-        glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
-        glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
-                          static_cast<GLsizeiptr>(size));
-    }
+    UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);

    // We may have to call BeginTransformFeedbackNV here since they seem to call different
    // implementations on Nvidia's driver (the pointer is different) but we are using
@@ -1692,23 +1370,7 @@ void RasterizerOpenGL::EndTransformFeedback() {
    if (regs.tfb_enabled == 0) {
        return;
    }
-
    glEndTransformFeedback();
-
-    for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
-        const auto& binding = regs.tfb_bindings[index];
-        if (!binding.buffer_enable) {
-            continue;
-        }
-        UNIMPLEMENTED_IF(binding.buffer_offset != 0);
-
-        const GLuint handle = transform_feedback_buffers[index].handle;
-        const GPUVAddr gpu_addr = binding.Address();
-        const std::size_t size = binding.buffer_size;
-        const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
-        glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
-                                 static_cast<GLsizeiptr>(size));
-    }
 }

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -30,7 +30,6 @@
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
 #include "video_core/renderer_opengl/gl_texture_cache.h"
 #include "video_core/shader/async_shaders.h"
 #include "video_core/textures/texture.h"
@@ -72,6 +71,7 @@ public:
    void DispatchCompute(GPUVAddr code_addr) override;
    void ResetCounter(VideoCore::QueryType type) override;
    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
+    void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
    void FlushAll() override;
    void FlushRegion(VAddr addr, u64 size) override;
    bool MustFlushRegion(VAddr addr, u64 size) override;
@@ -119,27 +119,6 @@ private:
    void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
                      size_t& image_view_index, size_t& texture_index, size_t& image_index);

-    /// Configures the current constbuffers to use for the draw command.
-    void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
-
-    /// Configures the current constbuffers to use for the kernel invocation.
-    void SetupComputeConstBuffers(Shader* kernel);
-
-    /// Configures a constant buffer.
-    void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
-                          const ConstBufferEntry& entry, bool use_unified,
-                          std::size_t unified_offset);
-
-    /// Configures the current global memory entries to use for the draw command.
-    void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
-
-    /// Configures the current global memory entries to use for the kernel invocation.
-    void SetupComputeGlobalMemory(Shader* kernel);
-
-    /// Configures a global memory buffer.
-    void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
-                           size_t size, BindlessSSBO* ssbo);
-
    /// Configures the current textures to use for the draw command.
    void SetupDrawTextures(const Shader* shader, size_t stage_index);

@@ -152,6 +131,9 @@ private:
    /// Configures images in a compute shader.
    void SetupComputeImages(const Shader* shader);

+    /// Syncs state to match guest's
+    void SyncState();
+
    /// Syncs the viewport and depth range to match the guest state
    void SyncViewport();

@@ -215,6 +197,12 @@ private:
    /// Syncs the framebuffer sRGB state to match the guest state
    void SyncFramebufferSRGB();

+    /// Syncs vertex formats to match the guest state
+    void SyncVertexFormats();
+
+    /// Syncs vertex instances to match the guest state
+    void SyncVertexInstances();
+
    /// Syncs transform feedback state to match guest state
    /// @note Only valid on assembly shaders
    void SyncTransformFeedback();
@@ -225,19 +213,7 @@ private:
    /// End a transform feedback
    void EndTransformFeedback();

-    std::size_t CalculateVertexArraysSize() const;
-
-    std::size_t CalculateIndexBufferSize() const;
-
-    /// Updates the current vertex format
-    void SetupVertexFormat();
-
-    void SetupVertexBuffer();
-    void SetupVertexInstances();
-
-    GLintptr SetupIndexBuffer();
-
-    void SetupShaders();
+    void SetupShaders(bool is_indexed);

    Tegra::GPU& gpu;
    Tegra::Engines::Maxwell3D& maxwell3d;
@@ -249,12 +225,12 @@ private:
    ProgramManager& program_manager;
    StateTracker& state_tracker;

-    OGLStreamBuffer stream_buffer;
    TextureCacheRuntime texture_cache_runtime;
    TextureCache texture_cache;
+    BufferCacheRuntime buffer_cache_runtime;
+    BufferCache buffer_cache;
    ShaderCacheOpenGL shader_cache;
    QueryCache query_cache;
-    OGLBufferCache buffer_cache;
    FenceManagerOpenGL fence_manager;

    VideoCommon::Shader::AsyncShaders async_shaders;
@@ -262,20 +238,8 @@ private:
    boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
    std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
    boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
-    std::array<GLuint, MAX_TEXTURES> texture_handles;
-    std::array<GLuint, MAX_IMAGES> image_handles;
-
-    std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
-        transform_feedback_buffers;
-    std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
-        enabled_transform_feedback_buffers;
-
-    static constexpr std::size_t NUM_CONSTANT_BUFFERS =
-        Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
-        Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
-    std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
-    std::size_t current_cbuf = 0;
-    OGLBuffer unified_uniform_buffer;
+    std::array<GLuint, MAX_TEXTURES> texture_handles{};
+    std::array<GLuint, MAX_IMAGES> image_handles{};

    /// Number of commands queued to the OpenGL driver. Resetted on flush.
    std::size_t num_queued_commands = 0;
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
lat9nq	0c24ae300c	externals: Update dynarmic to latest Updates dynarmic to its latest commit. Includes a fix for argument limits while compiling with Clang 12.	2021-02-18 14:44:49 -05:00
bunnei	6be0975bf2	Merge pull request #5121 from bunnei/optimize-core-timing core: Optimize core timing utility functions to avoid unnecessary math	2021-02-16 13:17:22 -08:00
Morph	723e038dba	Merge pull request #5929 from german77/mousePanning Improve mouse panning	2021-02-16 22:52:35 +08:00
bunnei	aaccb21f81	Merge pull request #4298 from FearlessTobi/remove-cache-setting yuzu/configure_filesystem: Remove "Select Cache Directory" option	2021-02-15 20:31:16 -08:00
LC	df1a9d09a9	Merge pull request #5942 from ReinUsesLisp/fixup-rebase vk_rasterizer: Fix loading shader addresses twice	2021-02-15 19:35:56 -05:00
ReinUsesLisp	24d0cc3ab8	vk_rasterizer: Fix loading shader addresses twice This was recently introduced on a wrongly rebased commit.	2021-02-15 21:34:13 -03:00
bunnei	86212d4bcd	Merge pull request #3603 from FearlessTobi/port-5123 Port citra-emu/citra#5123: "SDL: Disable hidapi drivers due to compatibility problems with certain controllers"	2021-02-15 16:25:53 -08:00
bunnei	f3345e84ad	core: core_timing_util: Optimize core timing math. - Avoids a lot of unnecessary 128-bit math for imperceptible accuracy.	2021-02-15 14:54:06 -08:00
bunnei	592a649918	common: wall_clock: Optimize GetClockCycles/GetCPUCycles to use a single MUL instruction.	2021-02-15 14:51:43 -08:00
bunnei	0a91599aec	common: Merge uint128 to a single header file with inlines.	2021-02-15 14:46:04 -08:00
bunnei	cffa6f4e62	Merge pull request #5923 from ReinUsesLisp/vk-dirty-pipeline fixed_pipeline_cache: Use dirty flags to lazily update key	2021-02-15 13:17:27 -08:00
LC	ed543c4d5c	Merge pull request #5939 from Morph1984/web_types core/CMakeLists: Add web_types.h	2021-02-15 14:02:10 -05:00
bunnei	b53b50adec	Merge pull request #4940 from german77/nativeGC HID: Implement GC controller in game	2021-02-15 10:32:19 -08:00
Morph	48cfc47050	core/CMakeLists: Add web_types.h	2021-02-15 09:40:30 -05:00
bunnei	90610bde9b	Merge pull request #5935 from lat9nq/controller_access_keys debugger: controller: Add access key	2021-02-14 22:33:59 -08:00
bunnei	8378b8a61f	Merge pull request #5909 from ogniK5377/I3dl2Reverb audren: Implement I3dl2Reverb	2021-02-14 20:09:15 -08:00
lat9nq	6269cd7f1d	debugger: controller: Add access key Adds the access key to the Controller P1 selection at View -> Debugger -> Controller P1. Avoids using the windowTitle as that would add a literal & to the beginning of the window title.	2021-02-14 16:10:12 -05:00
bunnei	b0a3915351	Merge pull request #5920 from bunnei/am-ldn-fix Fix LDN Initialization return code & resulting AM overflow	2021-02-14 02:46:01 -08:00
bunnei	eae9f2e440	yuzu: Various frontend improvements to avoid crashes and improve experience on Linux.	2021-02-14 00:20:41 -08:00
bunnei	d9a8060ce3	hle: service: ldn: IUserLocalCommunicationService: Improve the stub.	2021-02-13 21:45:09 -08:00
german	594973bdd2	Improve mouse panning	2021-02-13 22:23:32 -06:00
bunnei	51c13606d6	hle: service: ldn: IUserLocalCommunicationService: Indicate that LDN is disabled. - Fixes crash on Pokemon Sword/Shield when pressing 'Y'.	2021-02-13 20:11:26 -08:00
bunnei	d25011c92f	hle: service: am: IStorageAccessor: Fix out of bounds error handling.	2021-02-13 20:11:26 -08:00
LC	0bbf5e61f1	Merge pull request #5925 from ReinUsesLisp/resource-pool-clean vk_resource_pool: Load GPU tick once and compare with it	2021-02-13 20:57:44 -05:00
ReinUsesLisp	b8ffdbb167	vk_resource_pool: Load GPU tick once and compare with it Other minor style improvements. Rename free_iterator to hint_iterator, to describe better what it does.	2021-02-13 17:53:58 -03:00
ReinUsesLisp	70353649d7	fixed_pipeline_cache: Use dirty flags to lazily update key Use dirty flags to avoid building pipeline key from scratch on each draw call. This saves a bit of unnecesary work on each draw call.	2021-02-13 17:44:47 -03:00
Rodrigo Locatti	95722823b9	Merge pull request #5921 from ameerj/srgb-views gl_texture_cache: Lazily create non-sRGB texture views for sRGB formats	2021-02-13 16:51:53 -03:00
ameerj	c7325c6a4c	gl_texture_cache: Lazily create non-sRGB texture views for sRGB formats This creates non-sRGB texture views for sRGB texture formats to allow for interfacing with these views in compute shaders using imageLoad and imageStore. Co-Authored-By: Rodrigo Locatti <reinuseslisp@airmail.cc>	2021-02-13 13:27:50 -05:00
Morph	83227ad981	Merge pull request #5919 from ReinUsesLisp/stream-buffer-tragic gl_stream_buffer/vk_staging_buffer_pool: Fix size check	2021-02-13 21:25:45 +08:00
ReinUsesLisp	dd9caf9aa0	vk_master_semaphore: Mark gpu_tick atomic operations with relaxed order	2021-02-13 05:57:28 -03:00
ReinUsesLisp	6171566296	vk_staging_buffer_pool: Inline tick tests Load the current tick to a local variable, moving it out of an atomic and allowing us to compare the value without going through a pointer each time. This should make the loop more optimizable.	2021-02-13 05:14:11 -03:00
ReinUsesLisp	682d82faf3	gl_stream_buffer/vk_staging_buffer_pool: Fix size check Fix a tragic off-by-one condition that causes Vulkan's stream buffer to think it's always full, using fallback memory. The OpenGL was also affected by this bug to a lesser extent.	2021-02-13 05:11:48 -03:00
LC	710aa22f7c	Merge pull request #5915 from lat9nq/screenshots-dir-fix yuzu: Create screenshot path before capture	2021-02-13 02:56:23 -05:00
LC	6f1ad6aa9f	Merge pull request #5916 from ameerj/maxwell-gl-unused maxwell_to_gl: Remove unused code	2021-02-13 02:55:59 -05:00
LC	06e3d3a658	Merge pull request #5917 from ReinUsesLisp/require-robustness2 vulkan_device: Require VK_EXT_robustness2	2021-02-13 02:55:31 -05:00
ReinUsesLisp	757fd1e917	vulkan_device: Require VK_EXT_robustness2 We are already using robustness2 features without requiring it explicitly, causing potential crashes on drivers without the extension. Requiring this at boot allows better diagnostics for it and formalizes our usage on the extension.	2021-02-13 03:31:50 -03:00
bunnei	d3c7a7e7cf	Merge pull request #5741 from ReinUsesLisp/new-bufcache video_core: Reimplement the buffer cache	2021-02-12 22:22:18 -08:00
ReinUsesLisp	13becdf18a	config: Make high GPU accuracy the default This is a better default for most games, yielding better performance and less graphical issues.	2021-02-13 02:38:05 -03:00
ReinUsesLisp	5b35b01070	video_core: Fix clang build issues	2021-02-13 02:26:47 -03:00
ReinUsesLisp	025fe458ae	vk_staging_buffer_pool: Fix softlock when stream buffer overflows There was still a code path that could wait on a timeline semaphore tick that would never be signalled. While we are at it, make use of more STL algorithms.	2021-02-13 02:18:38 -03:00
ReinUsesLisp	3a2eefb16c	vk_buffer_cache: Add support for null index buffers Games can bind a null index buffer (size=0) where all indices are evaluated as zero. VK_EXT_robustness2 doesn't support this and all drivers segfault when a null index buffer is passed to vkCmdBindIndexBuffer. Workaround this by creating a 4 byte buffer and filling it with zeroes. If it's read out of bounds, robustness takes care of returning zeroes as indices.	2021-02-13 02:18:38 -03:00
ReinUsesLisp	0b8b961442	buffer_cache: Add extra bytes to guest SSBOs Bind extra bytes beyond the guest API's bound range. This is due to some games like Astral Chain operating out of bounds. Binding the whole map range would be technically correct, but games have large maps that make this approach unaffordable for now.	2021-02-13 02:18:38 -03:00
ReinUsesLisp	93a69b6cc8	Merge branch 'bytes-to-map-end' into new-bufcache-wip	2021-02-13 02:18:35 -03:00
ReinUsesLisp	7402442442	vk_staging_buffer_pool: Get a staging buffer instead of waiting Avoids waiting idle while the GPU finishes to do work, and fixes an issue where we'd wait forever if a single command buffer (logic tick) all the data.	2021-02-13 02:18:05 -03:00
ReinUsesLisp	75fd3f95a3	yuzu/config: Disable assembly shaders by default Due to BindBufferRangeNV limitations and poor quality code emission from our side, assembly shaders are currently slower than GLSL. Their build time and feature advantages are still relevant, but they are outweighted by their runtime performance.	2021-02-13 02:18:05 -03:00
ReinUsesLisp	0b631f22fc	renderer_opengl: Remove interop Remove unused interop code from the OpenGL backend.	2021-02-13 02:18:04 -03:00
ReinUsesLisp	3da87d3f12	gl_buffer_cache: Drop interop based parameter buffer workarounds Sacrify runtime performance to avoid generating kernel exceptions on Windows due to our abusive aliasing of interop buffer objects.	2021-02-13 02:17:24 -03:00
ReinUsesLisp	2b95c137ff	buffer_cache: Heuristically detect stream buffers Detect when a memory region has been joined several times and increase the size of the created buffer on those instances. The buffer is assumed to be a "stream buffer", increasing its size should stop us from constantly recreating it and fragmenting memory.	2021-02-13 02:17:24 -03:00
ReinUsesLisp	ec9354d6d9	buffer_cache: Split CreateBuffer in separate functions Allow adding functionality to each function without making CreateBuffer more complex.	2021-02-13 02:17:24 -03:00
ReinUsesLisp	a02b4e1df6	buffer_cache: Skip cache on small uploads on Vulkan Ports from OpenGL the optimization to skip small 3D uniform buffer uploads. This will take advantage of the previously introduced stream buffer. Fixes instances where the staging buffer offset was being ignored.	2021-02-13 02:17:24 -03:00
ReinUsesLisp	35df1d1864	vk_staging_buffer_pool: Add stream buffer for small uploads This uses a ring buffer similar to OpenGL's stream buffer for small uploads. This stops us from allocating several small buffers, reducing memory fragmentation and cache locality. It uses dedicated allocations when possible.	2021-02-13 02:17:24 -03:00
ReinUsesLisp	8fd518ec40	vulkan_device: Enable robustBufferAccess Fix regression on Pascal on Animal Crossing: New Horizons, fixing a validation error.	2021-02-13 02:17:23 -03:00
ReinUsesLisp	82c2601555	video_core: Reimplement the buffer cache Reimplement the buffer cache using cached bindings and page level granularity for modification tracking. This also drops the usage of shared pointers and virtual functions from the cache. - Bindings are cached, allowing to skip work when the game changes few bits between draws. - OpenGL Assembly shaders no longer copy when a region has been modified from the GPU to emulate constant buffers, instead GL_EXT_memory_object is used to alias sub-buffers within the same allocation. - OpenGL Assembly shaders stream constant buffer data using glProgramBufferParametersIuivNV, from NV_parameter_buffer_object. In theory this should save one hash table resolve inside the driver compared to glBufferSubData. - A new OpenGL stream buffer is implemented based on fences for drivers that are not Nvidia's proprietary, due to their low performance on partial glBufferSubData calls synchronized with 3D rendering (that some games use a lot). - Most optimizations are shared between APIs now, allowing Vulkan to cache more bindings than before, skipping unnecesarry work. This commit adds the necessary infrastructure to use Vulkan object from OpenGL. Overall, it improves performance and fixes some bugs present on the old cache. There are still some edge cases hit by some games that harm performance on some vendors, this are planned to be fixed in later commits.	2021-02-13 02:17:22 -03:00
ReinUsesLisp	a39d9c5194	vulkan_common: Expose interop and headless devices	2021-02-13 02:16:21 -03:00
ReinUsesLisp	47d5ec6cfc	vulkan_common: Make interop extensions mandatory	2021-02-13 02:16:21 -03:00
ReinUsesLisp	40ed0cb920	vulkan_device: Enable robust buffers	2021-02-13 02:16:21 -03:00
ReinUsesLisp	1a987054c5	vulkan_device: Use designated initializers for features	2021-02-13 02:16:21 -03:00
ReinUsesLisp	79afdeaf08	vulkan_wrapper: Add memory barrier pipeline barrier helper	2021-02-13 02:16:21 -03:00
ReinUsesLisp	004a8d6a7a	vulkan_device: Fix formatting of constants	2021-02-13 02:16:21 -03:00
ReinUsesLisp	16f97ded21	vulkan_wrapper: Add interop functions	2021-02-13 02:16:21 -03:00
ReinUsesLisp	9735c34f5d	vulkan_instance: Initialize Vulkan instance in a separate thread Workaround an issue on Nvidia where creating a Vulkan instance from an active OpenGL thread disables threaded optimization on the driver. This optimization is important to have good performance on Nvidia OpenGL.	2021-02-13 02:16:21 -03:00
ReinUsesLisp	dde19e7d75	vulkan_wrapper: Pull Windows symbols	2021-02-13 02:16:21 -03:00
ReinUsesLisp	75ccd9959c	gpu: Report renderer errors with exceptions Instead of using a two step initialization to report errors, initialize the GPU renderer and rasterizer on the constructor and report errors through std::runtime_error.	2021-02-13 02:16:19 -03:00
ReinUsesLisp	19156292a3	tests/buffer_base: Add cached CPU writes tests Ensure the behavior of the previous commit in tests.	2021-02-13 02:15:29 -03:00
ReinUsesLisp	9d8ca6cc4a	buffer_base: Add support for cached CPU writes Some games usually write memory pages currently used by the GPU, causing rendering issues (e.g. flashing geometry and shadows on Link's Awakening). To workaround this issue, Guest CPU writes are delayed until the command buffer finishes processing, but the pages are updated immediately. The overall behavior is: - CPU writes are cached until they are flushed, they update the page state, but don't change the modification state. Cached writes stop pages from being flushed, in case games have meaningful data in it. - Command processing writes (e.g. push constants) update the page state and are marked to the command processor as dirty. They don't remove the state of cached writes.	2021-02-13 02:15:29 -03:00
ameerj	069afcc633	maxwell_to_gl: Remove unused code Removes unused declarations in maxwell_to_gl.h	2021-02-12 23:01:09 -05:00
Chloe Marcec	7ad63ea542	revert to std::sin and std::cos	2021-02-12 18:48:10 -08:00
Chloe Marcec	d28b942458	address issues	2021-02-12 18:48:10 -08:00
Chloe Marcec	4a7fd91857	audren: Implement I3dl2Reverb Most notable fix is the voices in Fire Emblem Three Houses	2021-02-12 18:48:10 -08:00
bunnei	c86d770af9	Merge pull request #5877 from ameerj/res-limit-usage kernel: More accurately utilize resource_limit	2021-02-12 18:21:30 -08:00
ameerj	ec9b6641b1	kernel: More accurately reserve and release resources	2021-02-12 19:05:24 -05:00
ameerj	5fa6b15215	kernel: KScopedReservation implementation This implements KScopedReservation, allowing resource limit reservations to be more HW accurate, and release upon failure without requiring too many conditionals.	2021-02-12 18:57:34 -05:00
Chloe	37939482fb	kernel: Unify result codes (#5890 ) * kernel: Unify result codes Drop the usage of ERR_NAME convention in kernel for ResultName. Removed seperation between svc_results.h & errors.h as we mainly include both most of the time anyways. * oops * rename errors to svc_results	2021-02-12 15:43:01 -08:00
lat9nq	dcc0617cc2	yuzu: Create screenshot path before capture Allows screenshots in cases where the screenshots path doesn't already exist.	2021-02-12 17:26:01 -05:00
bunnei	a0379c2db5	Merge pull request #5902 from lioncash/core-warn core: Silence various warnings on Clang 12	2021-02-11 18:57:23 -08:00
bunnei	e53b6ecc76	Merge pull request #5869 from german77/mousePanning input_common: Add mouse panning	2021-02-11 09:58:23 -08:00
bunnei	f06c3f4907	Merge pull request #5908 from Morph1984/swkbd-finalize software_keyboard: Implement Finalize request command	2021-02-10 21:49:53 -08:00
Lioncash	0cd40fb523	bsd: Remove usage of optional emplace() with no arguments Clang 12 currently falls over in the face of this.	2021-02-09 17:50:29 -05:00
Lioncash	1dab8acf5f	am/controller: Remove [[fallthrough]] from unreachable path Prevents warnings on clang 12. This path is reachable on other variations of the build that disable the unreachable macro.	2021-02-09 17:44:14 -05:00
Lioncash	d64ba58759	nfp: Correct uninitialized size being used within GetTagInfo() We were previously the name of the object being initialized within its own initializer, which results in uninitialized data being read.	2021-02-09 17:42:02 -05:00
german	bcd4e4f650	Use GC image	2021-02-09 08:12:21 -06:00
german	a994a40467	hid: Implement GC controller	2021-02-07 22:59:46 -06:00
german	52b79ac009	Add mouse panning	2021-02-07 20:31:58 -06:00
FearlessTobi	8e77d331be	yuzu/configure_filesystem: Remove "Select Cache Directory" option This tab of the settings is already extremely bloated and the setting itself is quite useless. With a gamelist of almost 30 games, the cache directory is smaller than 1MB for me and therefore I don't see why it needs to be configurable.	2021-01-04 06:29:48 +01:00
Vitor Kiguchi	e6f9fe1f60	sdl_joystick: disable the use of the hidapi drivers due to many problems caused by them. The main problem is the loss of compatibility with some controllers, but there are also unwanted changes to the behaviour of PS4 controllers (hardcoded lightbar color).	2020-08-30 05:06:49 +02:00