kernel: match calls to Register and Unregister

Merge pull request #10082 from FernandoS27/the-testers-really-love-chocolate
Refactor Accelerate DMA and do downloads through TC.
2023-04-29 21:52:26 -04:00 · 2023-04-29 11:46:01 -07:00 · 2023-04-29 15:31:38 +02:00 · 2023-04-29 00:18:21 +02:00 · 2023-04-29 00:18:21 +02:00 · 2023-04-29 00:18:21 +02:00
35 changed files with 555 additions and 174 deletions
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -293,6 +293,7 @@ struct System::Impl {
        ASSERT(Kernel::KProcess::Initialize(main_process, system, "main",
                                            Kernel::KProcess::ProcessType::Userland, resource_limit)
                   .IsSuccess());
+        Kernel::KProcess::Register(system.Kernel(), main_process);
        kernel.MakeApplicationProcess(main_process);
        const auto [load_result, load_parameters] = app_loader->Load(*main_process, system);
        if (load_result != Loader::ResultStatus::Success) {
--- a/src/core/hid/emulated_controller.cpp
+++ b/src/core/hid/emulated_controller.cpp
@@ -280,6 +280,10 @@ void EmulatedController::LoadVirtualGamepadParams() {
    virtual_stick_params[Settings::NativeAnalog::LStick].Set("axis_y", 1);
    virtual_stick_params[Settings::NativeAnalog::RStick].Set("axis_x", 2);
    virtual_stick_params[Settings::NativeAnalog::RStick].Set("axis_y", 3);
+    virtual_stick_params[Settings::NativeAnalog::LStick].Set("deadzone", 0.0f);
+    virtual_stick_params[Settings::NativeAnalog::LStick].Set("range", 1.0f);
+    virtual_stick_params[Settings::NativeAnalog::RStick].Set("deadzone", 0.0f);
+    virtual_stick_params[Settings::NativeAnalog::RStick].Set("range", 1.0f);
 }

 void EmulatedController::ReloadInput() {
--- a/src/core/hle/kernel/k_auto_object.h
+++ b/src/core/hle/kernel/k_auto_object.h
@@ -182,8 +182,8 @@ public:
    explicit KAutoObjectWithList(KernelCore& kernel) : KAutoObject(kernel) {}

    static int Compare(const KAutoObjectWithList& lhs, const KAutoObjectWithList& rhs) {
-        const u64 lid = lhs.GetId();
-        const u64 rid = rhs.GetId();
+        const uintptr_t lid = reinterpret_cast<uintptr_t>(std::addressof(lhs));
+        const uintptr_t rid = reinterpret_cast<uintptr_t>(std::addressof(rhs));

        if (lid < rid) {
            return -1;
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -95,7 +95,7 @@ struct KernelCore::Impl {
                                       pt_heap_region.GetSize());
        }

-        InitializeHackSharedMemory();
+        InitializeHackSharedMemory(kernel);
        RegisterHostThread(nullptr);
    }

@@ -216,10 +216,12 @@ struct KernelCore::Impl {
            auto* main_thread{Kernel::KThread::Create(system.Kernel())};
            main_thread->SetCurrentCore(core);
            ASSERT(Kernel::KThread::InitializeMainThread(system, main_thread, core).IsSuccess());
+            KThread::Register(system.Kernel(), main_thread);

            auto* idle_thread{Kernel::KThread::Create(system.Kernel())};
            idle_thread->SetCurrentCore(core);
            ASSERT(Kernel::KThread::InitializeIdleThread(system, idle_thread, core).IsSuccess());
+            KThread::Register(system.Kernel(), idle_thread);

            schedulers[i]->Initialize(main_thread, idle_thread, core);
        }
@@ -230,6 +232,7 @@ struct KernelCore::Impl {
                                       const Core::Timing::CoreTiming& core_timing) {
        system_resource_limit = KResourceLimit::Create(system.Kernel());
        system_resource_limit->Initialize(&core_timing);
+        KResourceLimit::Register(kernel, system_resource_limit);

        const auto sizes{memory_layout->GetTotalAndKernelMemorySizes()};
        const auto total_size{sizes.first};
@@ -355,6 +358,7 @@ struct KernelCore::Impl {
            ASSERT(KThread::InitializeHighPriorityThread(system, shutdown_threads[core_id], {}, {},
                                                         core_id)
                       .IsSuccess());
+            KThread::Register(system.Kernel(), shutdown_threads[core_id]);
        }
    }

@@ -729,7 +733,7 @@ struct KernelCore::Impl {
        memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize());
    }

-    void InitializeHackSharedMemory() {
+    void InitializeHackSharedMemory(KernelCore& kernel) {
        // Setup memory regions for emulated processes
        // TODO(bunnei): These should not be hardcoded regions initialized within the kernel
        constexpr std::size_t hid_size{0x40000};
@@ -746,14 +750,23 @@ struct KernelCore::Impl {

        hid_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
                                   Svc::MemoryPermission::Read, hid_size);
+        KSharedMemory::Register(kernel, hid_shared_mem);
+
        font_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
                                    Svc::MemoryPermission::Read, font_size);
+        KSharedMemory::Register(kernel, font_shared_mem);
+
        irs_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
                                   Svc::MemoryPermission::Read, irs_size);
+        KSharedMemory::Register(kernel, irs_shared_mem);
+
        time_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
                                    Svc::MemoryPermission::Read, time_size);
+        KSharedMemory::Register(kernel, time_shared_mem);
+
        hidbus_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
                                      Svc::MemoryPermission::Read, hidbus_size);
+        KSharedMemory::Register(kernel, hidbus_shared_mem);
    }

    std::mutex registered_objects_lock;
@@ -1072,12 +1085,15 @@ static std::jthread RunHostThreadFunc(KernelCore& kernel, KProcess* process,
    // Commit the thread reservation.
    thread_reservation.Commit();

+    // Register the thread.
+    KThread::Register(kernel, thread);
+
    return std::jthread(
        [&kernel, thread, thread_name{std::move(thread_name)}, func{std::move(func)}] {
            // Set the thread name.
            Common::SetCurrentThreadName(thread_name.c_str());

-            // Register the thread.
+            // Set the thread as current.
            kernel.RegisterHostThread(thread);

            // Run the callback.
@@ -1099,6 +1115,9 @@ std::jthread KernelCore::RunOnHostCoreProcess(std::string&& process_name,
    // Ensure that we don't hold onto any extra references.
    SCOPE_EXIT({ process->Close(); });

+    // Register the new process.
+    KProcess::Register(*this, process);
+
    // Run the host thread.
    return RunHostThreadFunc(*this, process, std::move(process_name), std::move(func));
 }
@@ -1124,6 +1143,9 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function
    // Ensure that we don't hold onto any extra references.
    SCOPE_EXIT({ process->Close(); });

+    // Register the new process.
+    KProcess::Register(*this, process);
+
    // Reserve a new thread from the process resource limit.
    KScopedResourceReservation thread_reservation(process, LimitableResource::ThreadCountMax);
    ASSERT(thread_reservation.Succeeded());
@@ -1136,6 +1158,9 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function
    // Commit the thread reservation.
    thread_reservation.Commit();

+    // Register the new thread.
+    KThread::Register(*this, thread);
+
    // Begin running the thread.
    ASSERT(R_SUCCEEDED(thread->Run()));
 }
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -1807,7 +1807,7 @@ void IApplicationFunctions::GetFriendInvitationStorageChannelEvent(HLERequestCon
 }

 void IApplicationFunctions::TryPopFromFriendInvitationStorageChannel(HLERequestContext& ctx) {
-    LOG_WARNING(Service_AM, "(STUBBED) called");
+    LOG_DEBUG(Service_AM, "(STUBBED) called");

    IPC::ResponseBuilder rb{ctx, 2};
    rb.Push(AM::ResultNoDataInChannel);
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -49,12 +49,6 @@ public:
        };
        // clang-format on
        RegisterHandlers(functions);
-
-        if (impl->GetSystem()
-                .Initialize(device_name, in_params, handle, applet_resource_user_id)
-                .IsError()) {
-            LOG_ERROR(Service_Audio, "Failed to initialize the AudioOut System!");
-        }
    }

    ~IAudioOut() override {
@@ -287,6 +281,14 @@ void AudOutU::OpenAudioOut(HLERequestContext& ctx) {

    auto audio_out = std::make_shared<IAudioOut>(system, *impl, new_session_id, device_name,
                                                 in_params, handle, applet_resource_user_id);
+    result = audio_out->GetImpl()->GetSystem().Initialize(device_name, in_params, handle,
+                                                          applet_resource_user_id);
+    if (result.IsError()) {
+        LOG_ERROR(Service_Audio, "Failed to initialize the AudioOut System!");
+        IPC::ResponseBuilder rb{ctx, 2};
+        rb.Push(result);
+        return;
+    }

    impl->sessions[new_session_id] = audio_out->GetImpl();
    impl->applet_resource_user_ids[new_session_id] = applet_resource_user_id;
--- a/src/core/hle/service/ipc_helpers.h
+++ b/src/core/hle/service/ipc_helpers.h
@@ -156,6 +156,7 @@ public:

            auto* session = Kernel::KSession::Create(kernel);
            session->Initialize(nullptr, 0);
+            Kernel::KSession::Register(kernel, session);

            auto next_manager = std::make_shared<Service::SessionRequestManager>(
                kernel, manager->GetServerManager());
--- a/src/core/hle/service/kernel_helpers.cpp
+++ b/src/core/hle/service/kernel_helpers.cpp
@@ -25,6 +25,9 @@ ServiceContext::ServiceContext(Core::System& system_, std::string name_)
                                        Kernel::KProcess::ProcessType::KernelInternal,
                                        kernel.GetSystemResourceLimit())
               .IsSuccess());
+
+    // Register the process.
+    Kernel::KProcess::Register(kernel, process);
    process_created = true;
 }

--- a/src/core/hle/service/mutex.cpp
+++ b/src/core/hle/service/mutex.cpp
@@ -12,6 +12,9 @@ Mutex::Mutex(Core::System& system) : m_system(system) {
    m_event = Kernel::KEvent::Create(system.Kernel());
    m_event->Initialize(nullptr);

+    // Register the event.
+    Kernel::KEvent::Register(system.Kernel(), m_event);
+
    ASSERT(R_SUCCEEDED(m_event->Signal()));
 }

--- a/src/core/hle/service/server_manager.cpp
+++ b/src/core/hle/service/server_manager.cpp
@@ -33,6 +33,9 @@ ServerManager::ServerManager(Core::System& system) : m_system{system}, m_serve_m
    // Initialize event.
    m_event = Kernel::KEvent::Create(system.Kernel());
    m_event->Initialize(nullptr);
+
+    // Register event.
+    Kernel::KEvent::Register(system.Kernel(), m_event);
 }

 ServerManager::~ServerManager() {
@@ -160,6 +163,9 @@ Result ServerManager::ManageDeferral(Kernel::KEvent** out_event) {
    // Initialize the event.
    m_deferral_event->Initialize(nullptr);

+    // Register the event.
+    Kernel::KEvent::Register(m_system.Kernel(), m_deferral_event);
+
    // Set the output.
    *out_event = m_deferral_event;

--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -64,6 +64,9 @@ Result ServiceManager::RegisterService(std::string name, u32 max_sessions,
    auto* port = Kernel::KPort::Create(kernel);
    port->Initialize(ServerSessionCountMax, false, 0);

+    // Register the port.
+    Kernel::KPort::Register(kernel, port);
+
    service_ports.emplace(name, port);
    registered_services.emplace(name, handler);
    if (deferral_event) {
--- a/src/core/hle/service/sm/sm_controller.cpp
+++ b/src/core/hle/service/sm/sm_controller.cpp
@@ -49,6 +49,9 @@ void Controller::CloneCurrentObject(HLERequestContext& ctx) {
    // Commit the session reservation.
    session_reservation.Commit();

+    // Register the session.
+    Kernel::KSession::Register(system.Kernel(), session);
+
    // Register with server manager.
    session_manager->GetServerManager().RegisterSession(&session->GetServerSession(),
                                                        session_manager);
--- a/src/dedicated_room/yuzu_room.cpp
+++ b/src/dedicated_room/yuzu_room.cpp
@@ -49,6 +49,7 @@ static void PrintHelp(const char* argv0) {
             " [options] <filename>\n"
             "--room-name         The name of the room\n"
             "--room-description  The room description\n"
+             "--bind-address      The bind address for the room\n"
             "--port              The port used for the room\n"
             "--max_members       The maximum number of players for this room\n"
             "--password          The password for the room\n"
@@ -195,6 +196,7 @@ int main(int argc, char** argv) {
    std::string web_api_url;
    std::string ban_list_file;
    std::string log_file = "yuzu-room.log";
+    std::string bind_address;
    u64 preferred_game_id = 0;
    u32 port = Network::DefaultRoomPort;
    u32 max_members = 16;
@@ -203,6 +205,7 @@ int main(int argc, char** argv) {
    static struct option long_options[] = {
        {"room-name", required_argument, 0, 'n'},
        {"room-description", required_argument, 0, 'd'},
+        {"bind-address", required_argument, 0, 's'},
        {"port", required_argument, 0, 'p'},
        {"max_members", required_argument, 0, 'm'},
        {"password", required_argument, 0, 'w'},
@@ -222,7 +225,8 @@ int main(int argc, char** argv) {
    InitializeLogging(log_file);

    while (optind < argc) {
-        int arg = getopt_long(argc, argv, "n:d:p:m:w:g:u:t:a:i:l:hv", long_options, &option_index);
+        int arg =
+            getopt_long(argc, argv, "n:d:s:p:m:w:g:u:t:a:i:l:hv", long_options, &option_index);
        if (arg != -1) {
            switch (static_cast<char>(arg)) {
            case 'n':
@@ -231,6 +235,9 @@ int main(int argc, char** argv) {
            case 'd':
                room_description.assign(optarg);
                break;
+            case 's':
+                bind_address.assign(optarg);
+                break;
            case 'p':
                port = strtoul(optarg, &endarg, 0);
                break;
@@ -295,6 +302,9 @@ int main(int argc, char** argv) {
        PrintHelp(argv[0]);
        return -1;
    }
+    if (bind_address.empty()) {
+        LOG_INFO(Network, "Bind address is empty: defaulting to 0.0.0.0");
+    }
    if (port > UINT16_MAX) {
        LOG_ERROR(Network, "Port needs to be in the range 0 - 65535!");
        PrintHelp(argv[0]);
@@ -358,8 +368,8 @@ int main(int argc, char** argv) {
    if (auto room = network.GetRoom().lock()) {
        AnnounceMultiplayerRoom::GameInfo preferred_game_info{.name = preferred_game,
                                                              .id = preferred_game_id};
-        if (!room->Create(room_name, room_description, "", port, password, max_members, username,
-                          preferred_game_info, std::move(verify_backend), ban_list,
+        if (!room->Create(room_name, room_description, bind_address, port, password, max_members,
+                          username, preferred_game_info, std::move(verify_backend), ban_list,
                          enable_yuzu_mods)) {
            LOG_INFO(Network, "Failed to create room: ");
            return -1;
--- a/src/input_common/drivers/mouse.cpp
+++ b/src/input_common/drivers/mouse.cpp
@@ -135,7 +135,7 @@ void Mouse::Move(int x, int y, int center_x, int center_y) {

        auto mouse_change =
            (Common::MakeVec(x, y) - Common::MakeVec(center_x, center_y)).Cast<float>();
-        last_motion_change += {-mouse_change.y, -mouse_change.x, last_motion_change.z};
+        last_motion_change += {-mouse_change.y, -mouse_change.x, 0};

        const auto move_distance = mouse_change.Length();
        if (move_distance == 0) {
--- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -143,6 +143,21 @@ IR::Inst* PrepareSparse(IR::Inst& inst) {
    }
    return sparse_inst;
 }
+
+std::string ImageGatherSubpixelOffset(const IR::TextureInstInfo& info, std::string_view texture,
+                                      std::string_view coords) {
+    switch (info.type) {
+    case TextureType::Color2D:
+    case TextureType::Color2DRect:
+        return fmt::format("{}+vec2(0.001953125)/vec2(textureSize({}, 0))", coords, texture);
+    case TextureType::ColorArray2D:
+    case TextureType::ColorCube:
+        return fmt::format("vec3({0}.xy+vec2(0.001953125)/vec2(textureSize({1}, 0)),{0}.z)", coords,
+                           texture);
+    default:
+        return std::string{coords};
+    }
+}
 } // Anonymous namespace

 void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
@@ -340,6 +355,13 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
        LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
        ctx.AddU1("{}=true;", *sparse_inst);
    }
+    std::string coords_with_subpixel_offset;
+    if (ctx.profile.need_gather_subpixel_offset) {
+        // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on
+        // AMD hardware as on Maxwell or other Nvidia architectures.
+        coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords);
+        coords = coords_with_subpixel_offset;
+    }
    if (!sparse_inst || !supports_sparse) {
        if (offset.IsEmpty()) {
            ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords,
@@ -387,6 +409,13 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
        LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
        ctx.AddU1("{}=true;", *sparse_inst);
    }
+    std::string coords_with_subpixel_offset;
+    if (ctx.profile.need_gather_subpixel_offset) {
+        // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on
+        // AMD hardware as on Maxwell or other Nvidia architectures.
+        coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords);
+        coords = coords_with_subpixel_offset;
+    }
    if (!sparse_inst || !supports_sparse) {
        if (offset.IsEmpty()) {
            ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref);
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -261,6 +261,30 @@ Id BitTest(EmitContext& ctx, Id mask, Id bit) {
    const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))};
    return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value);
 }
+
+Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info, Id texture,
+                             Id coords) {
+    // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on
+    // AMD hardware as on Maxwell or other Nvidia architectures.
+    const auto calculate_coords{[&](size_t dim) {
+        const Id nudge{ctx.Const(0x1p-9f)};
+        const Id image_size{ctx.OpImageQuerySizeLod(ctx.U32[dim], texture, ctx.u32_zero_value)};
+        Id offset{dim == 2 ? ctx.ConstantComposite(ctx.F32[dim], nudge, nudge)
+                           : ctx.ConstantComposite(ctx.F32[dim], nudge, nudge, ctx.f32_zero_value)};
+        offset = ctx.OpFDiv(ctx.F32[dim], offset, ctx.OpConvertUToF(ctx.F32[dim], image_size));
+        return ctx.OpFAdd(ctx.F32[dim], coords, offset);
+    }};
+    switch (info.type) {
+    case TextureType::Color2D:
+    case TextureType::Color2DRect:
+        return calculate_coords(2);
+    case TextureType::ColorArray2D:
+    case TextureType::ColorCube:
+        return calculate_coords(3);
+    default:
+        return coords;
+    }
+}
 } // Anonymous namespace

 Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
@@ -423,6 +447,9 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
                   const IR::Value& offset, const IR::Value& offset2) {
    const auto info{inst->Flags<IR::TextureInstInfo>()};
    const ImageOperands operands(ctx, offset, offset2);
+    if (ctx.profile.need_gather_subpixel_offset) {
+        coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
+    }
    return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
                ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component),
                operands.MaskOptional(), operands.Span());
@@ -432,6 +459,9 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
                       const IR::Value& offset, const IR::Value& offset2, Id dref) {
    const auto info{inst->Flags<IR::TextureInstInfo>()};
    const ImageOperands operands(ctx, offset, offset2);
+    if (ctx.profile.need_gather_subpixel_offset) {
+        coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
+    }
    return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
                ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(),
                operands.Span());
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -52,6 +52,10 @@ struct Profile {
    bool need_declared_frag_colors{};
    /// Prevents fast math optimizations that may cause inaccuracies
    bool need_fastmath_off{};
+    /// Some GPU vendors use a different rounding precision when calculating texture pixel
+    /// coordinates with the 16.8 format in the ImageGather instruction than the Maxwell
+    /// architecture. Applying an offset does fix this mismatching rounding behaviour.
+    bool need_gather_subpixel_offset{};

    /// OpFClamp is broken and OpFMax + OpFMin should be used instead
    bool has_broken_spirv_clamp{};
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -77,6 +77,14 @@ void Fermi2D::Blit() {
    const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
    const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 &&
                                 src.format != regs.dst.format;
+
+    auto srcX = args.src_x0;
+    auto srcY = args.src_y0;
+    if (args.sample_mode.origin == Origin::Corner) {
+        srcX -= (args.du_dx >> 33) << 32;
+        srcY -= (args.dv_dy >> 33) << 32;
+    }
+
    Config config{
        .operation = regs.operation,
        .filter = args.sample_mode.filter,
@@ -86,10 +94,10 @@ void Fermi2D::Blit() {
        .dst_y0 = args.dst_y0,
        .dst_x1 = args.dst_x0 + args.dst_width,
        .dst_y1 = args.dst_y0 + args.dst_height,
-        .src_x0 = static_cast<s32>(args.src_x0 >> 32),
-        .src_y0 = static_cast<s32>(args.src_y0 >> 32),
-        .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
-        .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
+        .src_x0 = static_cast<s32>(srcX >> 32),
+        .src_y0 = static_cast<s32>(srcY >> 32),
+        .src_x1 = static_cast<s32>((srcX + args.du_dx * args.dst_width) >> 32),
+        .src_y1 = static_cast<s32>((srcY + args.dv_dy * args.dst_height) >> 32),
    };

    const auto need_align_to_pitch =
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -4,6 +4,7 @@
 #include <cstring>
 #include <optional>
 #include "common/assert.h"
+#include "common/bit_util.h"
 #include "common/scope_exit.h"
 #include "common/settings.h"
 #include "core/core.h"
@@ -222,6 +223,9 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
 }

 void Maxwell3D::RefreshParametersImpl() {
+    if (!Settings::IsGPULevelHigh()) {
+        return;
+    }
    size_t current_index = 0;
    for (auto& segment : macro_segments) {
        if (segment.first == 0) {
@@ -259,12 +263,13 @@ u32 Maxwell3D::GetMaxCurrentVertices() {
 size_t Maxwell3D::EstimateIndexBufferSize() {
    GPUVAddr start_address = regs.index_buffer.StartAddress();
    GPUVAddr end_address = regs.index_buffer.EndAddress();
-    static constexpr std::array<size_t, 4> max_sizes = {
-        std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(),
-        std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
+    static constexpr std::array<size_t, 3> max_sizes = {std::numeric_limits<u8>::max(),
+                                                        std::numeric_limits<u16>::max(),
+                                                        std::numeric_limits<u32>::max()};
    const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
+    const size_t log2_byte_size = Common::Log2Ceil64(byte_size);
    return std::min<size_t>(
-        memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) /
+        memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[log2_byte_size]) /
            byte_size,
        static_cast<size_t>(end_address - start_address));
 }
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -176,6 +176,10 @@ public:
        return vendor_name == "ATI Technologies Inc.";
    }

+    bool IsIntel() const {
+        return vendor_name == "Intel";
+    }
+
    bool CanReportMemoryUsage() const {
        return can_report_memory;
    }
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1287,8 +1287,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
    }
    const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
    static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
-    const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing
-                                         : VideoCommon::ObtainBufferOperation::MarkAsWritten;
+    const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
    const auto [buffer, offset] =
        buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);

@@ -1299,7 +1298,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
    if constexpr (IS_IMAGE_UPLOAD) {
        image->UploadMemory(buffer->Handle(), offset, copy_span);
    } else {
-        image->DownloadMemory(buffer->Handle(), offset, copy_span);
+        texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
+                                              buffer_operand.address, buffer_size);
    }
    return true;
 }
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -218,6 +218,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
          .lower_left_origin_mode = true,
          .need_declared_frag_colors = true,
          .need_fastmath_off = device.NeedsFastmathOff(),
+          .need_gather_subpixel_offset = device.IsAmd() || device.IsIntel(),

          .has_broken_spirv_clamp = true,
          .has_broken_unsigned_image_offsets = true,
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -803,30 +803,40 @@ void Image::UploadMemory(const ImageBufferMap& map,

 void Image::DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
                           std::span<const VideoCommon::BufferImageCopy> copies) {
+    std::array buffer_handles{buffer_handle};
+    std::array buffer_offsets{buffer_offset};
+    DownloadMemory(buffer_handles, buffer_offsets, copies);
+}
+
+void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> buffer_offsets,
+                           std::span<const VideoCommon::BufferImageCopy> copies) {
    const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
    if (is_rescaled) {
        ScaleDown();
    }
    glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
-    glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle);
-    glPixelStorei(GL_PACK_ALIGNMENT, 1);
+    for (size_t i = 0; i < buffer_handles.size(); i++) {
+        auto& buffer_handle = buffer_handles[i];
+        glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle);
+        glPixelStorei(GL_PACK_ALIGNMENT, 1);

-    u32 current_row_length = std::numeric_limits<u32>::max();
-    u32 current_image_height = std::numeric_limits<u32>::max();
+        u32 current_row_length = std::numeric_limits<u32>::max();
+        u32 current_image_height = std::numeric_limits<u32>::max();

-    for (const VideoCommon::BufferImageCopy& copy : copies) {
-        if (copy.image_subresource.base_level >= gl_num_levels) {
-            continue;
+        for (const VideoCommon::BufferImageCopy& copy : copies) {
+            if (copy.image_subresource.base_level >= gl_num_levels) {
+                continue;
+            }
+            if (current_row_length != copy.buffer_row_length) {
+                current_row_length = copy.buffer_row_length;
+                glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
+            }
+            if (current_image_height != copy.buffer_image_height) {
+                current_image_height = copy.buffer_image_height;
+                glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
+            }
+            CopyImageToBuffer(copy, buffer_offsets[i]);
        }
-        if (current_row_length != copy.buffer_row_length) {
-            current_row_length = copy.buffer_row_length;
-            glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
-        }
-        if (current_image_height != copy.buffer_image_height) {
-            current_image_height = copy.buffer_image_height;
-            glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
-        }
-        CopyImageToBuffer(copy, buffer_offset);
    }
    if (is_rescaled) {
        ScaleUp(true);
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -215,6 +215,9 @@ public:
    void DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
                        std::span<const VideoCommon::BufferImageCopy> copies);

+    void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset,
+                        std::span<const VideoCommon::BufferImageCopy> copies);
+
    void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);

    GLuint StorageHandle() noexcept;
@@ -376,6 +379,7 @@ struct TextureCacheParams {
    using Sampler = OpenGL::Sampler;
    using Framebuffer = OpenGL::Framebuffer;
    using AsyncBuffer = u32;
+    using BufferType = GLuint;
 };

 using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -10,7 +10,14 @@

 namespace Vulkan {

-MasterSemaphore::MasterSemaphore(const Device& device) {
+MasterSemaphore::MasterSemaphore(const Device& device_) : device(device_) {
+    if (!device.HasTimelineSemaphore()) {
+        static constexpr VkFenceCreateInfo fence_ci{
+            .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0};
+        fence = device.GetLogical().CreateFence(fence_ci);
+        return;
+    }
+
    static constexpr VkSemaphoreTypeCreateInfo semaphore_type_ci{
        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
        .pNext = nullptr,
@@ -42,4 +49,134 @@ MasterSemaphore::MasterSemaphore(const Device& device) {

 MasterSemaphore::~MasterSemaphore() = default;

+void MasterSemaphore::Refresh() {
+    if (!semaphore) {
+        // If we don't support timeline semaphores, there's nothing to refresh
+        return;
+    }
+
+    u64 this_tick{};
+    u64 counter{};
+    do {
+        this_tick = gpu_tick.load(std::memory_order_acquire);
+        counter = semaphore.GetCounter();
+        if (counter < this_tick) {
+            return;
+        }
+    } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
+                                             std::memory_order_relaxed));
+}
+
+void MasterSemaphore::Wait(u64 tick) {
+    if (!semaphore) {
+        // If we don't support timeline semaphores, use an atomic wait
+        while (true) {
+            u64 current_value = gpu_tick.load(std::memory_order_relaxed);
+            if (current_value >= tick) {
+                return;
+            }
+            gpu_tick.wait(current_value);
+        }
+
+        return;
+    }
+
+    // No need to wait if the GPU is ahead of the tick
+    if (IsFree(tick)) {
+        return;
+    }
+
+    // Update the GPU tick and try again
+    Refresh();
+
+    if (IsFree(tick)) {
+        return;
+    }
+
+    // If none of the above is hit, fallback to a regular wait
+    while (!semaphore.Wait(tick)) {
+    }
+
+    Refresh();
+}
+
+VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+                                      VkSemaphore wait_semaphore, u64 host_tick) {
+    if (semaphore) {
+        return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
+    } else {
+        return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
+    }
+}
+
+static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
+    VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+    VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+};
+
+VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
+                                              VkSemaphore signal_semaphore,
+                                              VkSemaphore wait_semaphore, u64 host_tick) {
+    const VkSemaphore timeline_semaphore = *semaphore;
+
+    const u32 num_signal_semaphores = signal_semaphore ? 2 : 1;
+    const std::array signal_values{host_tick, u64(0)};
+    const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
+
+    const u32 num_wait_semaphores = wait_semaphore ? 2 : 1;
+    const std::array wait_values{host_tick - 1, u64(1)};
+    const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
+
+    const VkTimelineSemaphoreSubmitInfo timeline_si{
+        .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
+        .pNext = nullptr,
+        .waitSemaphoreValueCount = num_wait_semaphores,
+        .pWaitSemaphoreValues = wait_values.data(),
+        .signalSemaphoreValueCount = num_signal_semaphores,
+        .pSignalSemaphoreValues = signal_values.data(),
+    };
+    const VkSubmitInfo submit_info{
+        .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+        .pNext = &timeline_si,
+        .waitSemaphoreCount = num_wait_semaphores,
+        .pWaitSemaphores = wait_semaphores.data(),
+        .pWaitDstStageMask = wait_stage_masks.data(),
+        .commandBufferCount = 1,
+        .pCommandBuffers = cmdbuf.address(),
+        .signalSemaphoreCount = num_signal_semaphores,
+        .pSignalSemaphores = signal_semaphores.data(),
+    };
+
+    return device.GetGraphicsQueue().Submit(submit_info);
+}
+
+VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+                                           VkSemaphore wait_semaphore, u64 host_tick) {
+    const u32 num_signal_semaphores = signal_semaphore ? 1 : 0;
+    const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
+
+    const VkSubmitInfo submit_info{
+        .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+        .pNext = nullptr,
+        .waitSemaphoreCount = num_wait_semaphores,
+        .pWaitSemaphores = &wait_semaphore,
+        .pWaitDstStageMask = wait_stage_masks.data(),
+        .commandBufferCount = 1,
+        .pCommandBuffers = cmdbuf.address(),
+        .signalSemaphoreCount = num_signal_semaphores,
+        .pSignalSemaphores = &signal_semaphore,
+    };
+
+    auto result = device.GetGraphicsQueue().Submit(submit_info, *fence);
+
+    if (result == VK_SUCCESS) {
+        fence.Wait();
+        fence.Reset();
+        gpu_tick.store(host_tick);
+        gpu_tick.notify_all();
+    }
+
+    return result;
+}
+
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -4,6 +4,8 @@
 #pragma once

 #include <atomic>
+#include <condition_variable>
+#include <mutex>
 #include <thread>

 #include "common/common_types.h"
@@ -29,11 +31,6 @@ public:
        return gpu_tick.load(std::memory_order_acquire);
    }

-    /// Returns the timeline semaphore handle.
-    [[nodiscard]] VkSemaphore Handle() const noexcept {
-        return *semaphore;
-    }
-
    /// Returns true when a tick has been hit by the GPU.
    [[nodiscard]] bool IsFree(u64 tick) const noexcept {
        return KnownGpuTick() >= tick;
@@ -45,37 +42,24 @@ public:
    }

    /// Refresh the known GPU tick
-    void Refresh() {
-        u64 this_tick{};
-        u64 counter{};
-        do {
-            this_tick = gpu_tick.load(std::memory_order_acquire);
-            counter = semaphore.GetCounter();
-            if (counter < this_tick) {
-                return;
-            }
-        } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
-                                                 std::memory_order_relaxed));
-    }
+    void Refresh();

    /// Waits for a tick to be hit on the GPU
-    void Wait(u64 tick) {
-        // No need to wait if the GPU is ahead of the tick
-        if (IsFree(tick)) {
-            return;
-        }
-        // Update the GPU tick and try again
-        Refresh();
-        if (IsFree(tick)) {
-            return;
-        }
-        // If none of the above is hit, fallback to a regular wait
-        while (!semaphore.Wait(tick)) {
-        }
-        Refresh();
-    }
+    void Wait(u64 tick);
+
+    /// Submits the device graphics queue, updating the tick as necessary
+    VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+                         VkSemaphore wait_semaphore, u64 host_tick);

 private:
+    VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+                                 VkSemaphore wait_semaphore, u64 host_tick);
+    VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
+                              VkSemaphore wait_semaphore, u64 host_tick);
+
+private:
+    const Device& device;             ///< Device.
+    vk::Fence fence;                  ///< Fence.
    vk::Semaphore semaphore;          ///< Timeline semaphore.
    std::atomic<u64> gpu_tick{0};     ///< Current known GPU tick.
    std::atomic<u64> current_tick{1}; ///< Current logical tick.
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -329,6 +329,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device

        .lower_left_origin_mode = false,
        .need_declared_frag_colors = false,
+        .need_gather_subpixel_offset = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY ||
+                                       driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
+                                       driver_id == VK_DRIVER_ID_MESA_RADV ||
+                                       driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS ||
+                                       driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA,

        .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS,
        .has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -781,8 +781,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
    }
    const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
    static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
-    const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing
-                                         : VideoCommon::ObtainBufferOperation::MarkAsWritten;
+    const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
    const auto [buffer, offset] =
        buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);

@@ -793,7 +792,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
    if constexpr (IS_IMAGE_UPLOAD) {
        image->UploadMemory(buffer->Handle(), offset, copy_span);
    } else {
-        image->DownloadMemory(buffer->Handle(), offset, copy_span);
+        texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
+                                              buffer_operand.address, buffer_size);
    }
    return true;
 }
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -212,45 +212,13 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
    const u64 signal_value = master_semaphore->NextTick();
    Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
        cmdbuf.End();
-        const VkSemaphore timeline_semaphore = master_semaphore->Handle();
-
-        const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
-        const std::array signal_values{signal_value, u64(0)};
-        const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
-
-        const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
-        const std::array wait_values{signal_value - 1, u64(1)};
-        const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
-        static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
-            VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
-            VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
-        };
-
-        const VkTimelineSemaphoreSubmitInfo timeline_si{
-            .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
-            .pNext = nullptr,
-            .waitSemaphoreValueCount = num_wait_semaphores,
-            .pWaitSemaphoreValues = wait_values.data(),
-            .signalSemaphoreValueCount = num_signal_semaphores,
-            .pSignalSemaphoreValues = signal_values.data(),
-        };
-        const VkSubmitInfo submit_info{
-            .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
-            .pNext = &timeline_si,
-            .waitSemaphoreCount = num_wait_semaphores,
-            .pWaitSemaphores = wait_semaphores.data(),
-            .pWaitDstStageMask = wait_stage_masks.data(),
-            .commandBufferCount = 1,
-            .pCommandBuffers = cmdbuf.address(),
-            .signalSemaphoreCount = num_signal_semaphores,
-            .pSignalSemaphores = signal_semaphores.data(),
-        };

        if (on_submit) {
            on_submit();
        }

-        switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
+        switch (const VkResult result = master_semaphore->SubmitQueue(
+                    cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
        case VK_SUCCESS:
            break;
        case VK_ERROR_DEVICE_LOST:
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -65,6 +65,18 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
    return extent;
 }

+VkCompositeAlphaFlagBitsKHR ChooseAlphaFlags(const VkSurfaceCapabilitiesKHR& capabilities) {
+    if (capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR) {
+        return VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+    } else if (capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) {
+        return VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR;
+    } else {
+        LOG_ERROR(Render_Vulkan, "Unknown composite alpha flags value {:#x}",
+                  capabilities.supportedCompositeAlpha);
+        return VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+    }
+}
+
 } // Anonymous namespace

 Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_,
@@ -155,6 +167,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
    const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
    const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};

+    const VkCompositeAlphaFlagBitsKHR alpha_flags{ChooseAlphaFlags(capabilities)};
    const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
    present_mode = ChooseSwapPresentMode(present_modes);

@@ -185,7 +198,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
        .queueFamilyIndexCount = 0,
        .pQueueFamilyIndices = nullptr,
        .preTransform = capabilities.currentTransform,
-        .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
+        .compositeAlpha = alpha_flags,
        .presentMode = present_mode,
        .clipped = VK_FALSE,
        .oldSwapchain = nullptr,
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1,10 +1,11 @@
 // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-3.0-or-later

 #include <algorithm>
 #include <array>
 #include <span>
 #include <vector>
+#include <boost/container/small_vector.hpp>

 #include "common/bit_cast.h"
 #include "common/bit_util.h"
@@ -1343,14 +1344,31 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag

 void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
                           std::span<const VideoCommon::BufferImageCopy> copies) {
+    std::array buffer_handles{
+        buffer,
+    };
+    std::array buffer_offsets{
+        offset,
+    };
+    DownloadMemory(buffer_handles, buffer_offsets, copies);
+}
+
+void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceSize> offsets_span,
+                           std::span<const VideoCommon::BufferImageCopy> copies) {
    const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
    if (is_rescaled) {
        ScaleDown();
    }
-    std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
+    boost::container::small_vector<VkBuffer, 1> buffers_vector{};
+    boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies;
+    for (size_t index = 0; index < buffers_span.size(); index++) {
+        buffers_vector.emplace_back(buffers_span[index]);
+        vk_copies.emplace_back(
+            TransformBufferImageCopies(copies, offsets_span[index], aspect_mask));
+    }
    scheduler->RequestOutsideRenderPassOperationContext();
-    scheduler->Record([buffer, image = *original_image, aspect_mask = aspect_mask,
-                       vk_copies](vk::CommandBuffer cmdbuf) {
+    scheduler->Record([buffers = std::move(buffers_vector), image = *original_image,
+                       aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
        const VkImageMemoryBarrier read_barrier{
            .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
            .pNext = nullptr,
@@ -1369,6 +1387,20 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
                .layerCount = VK_REMAINING_ARRAY_LAYERS,
            },
        };
+        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                               0, read_barrier);
+
+        for (size_t index = 0; index < buffers.size(); index++) {
+            cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index],
+                                     vk_copies[index]);
+        }
+
+        const VkMemoryBarrier memory_write_barrier{
+            .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+            .pNext = nullptr,
+            .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
+            .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+        };
        const VkImageMemoryBarrier image_write_barrier{
            .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
            .pNext = nullptr,
@@ -1387,15 +1419,6 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
                .layerCount = VK_REMAINING_ARRAY_LAYERS,
            },
        };
-        const VkMemoryBarrier memory_write_barrier{
-            .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
-            .pNext = nullptr,
-            .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
-            .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
-        };
-        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
-                               0, read_barrier);
-        cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
        cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
                               0, memory_write_barrier, nullptr, image_write_barrier);
    });
@@ -1405,7 +1428,13 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
 }

 void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
-    DownloadMemory(map.buffer, map.offset, copies);
+    std::array buffers{
+        map.buffer,
+    };
+    std::array offsets{
+        map.offset,
+    };
+    DownloadMemory(buffers, offsets, copies);
 }

 bool Image::IsRescaled() const noexcept {
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -1,5 +1,5 @@
 // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
+// SPDX-License-Identifier: GPL-3.0-or-later

 #pragma once

@@ -141,6 +141,9 @@ public:
    void DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
                        std::span<const VideoCommon::BufferImageCopy> copies);

+    void DownloadMemory(std::span<VkBuffer> buffers, std::span<VkDeviceSize> offsets,
+                        std::span<const VideoCommon::BufferImageCopy> copies);
+
    void DownloadMemory(const StagingBufferRef& map,
                        std::span<const VideoCommon::BufferImageCopy> copies);

@@ -371,6 +374,7 @@ struct TextureCacheParams {
    using Sampler = Vulkan::Sampler;
    using Framebuffer = Vulkan::Framebuffer;
    using AsyncBuffer = Vulkan::StagingBufferRef;
+    using BufferType = VkBuffer;
 };

 using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1,9 +1,10 @@
-// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-3.0-or-later

 #pragma once

 #include <unordered_set>
+#include <boost/container/small_vector.hpp>

 #include "common/alignment.h"
 #include "common/settings.h"
@@ -17,15 +18,10 @@

 namespace VideoCommon {

-using Tegra::Texture::SwizzleSource;
-using Tegra::Texture::TextureType;
 using Tegra::Texture::TICEntry;
 using Tegra::Texture::TSCEntry;
 using VideoCore::Surface::GetFormatType;
-using VideoCore::Surface::IsCopyCompatible;
 using VideoCore::Surface::PixelFormat;
-using VideoCore::Surface::PixelFormatFromDepthFormat;
-using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
 using VideoCore::Surface::SurfaceType;
 using namespace Common::Literals;

@@ -143,6 +139,13 @@ void TextureCache<P>::TickFrame() {
    runtime.TickFrame();
    critical_gc = 0;
    ++frame_tick;
+
+    if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
+        for (auto& buffer : async_buffers_death_ring) {
+            runtime.FreeDeferredStagingBuffer(buffer);
+        }
+        async_buffers_death_ring.clear();
+    }
 }

 template <class P>
@@ -661,25 +664,39 @@ template <class P>
 void TextureCache<P>::CommitAsyncFlushes() {
    // This is intentionally passing the value by copy
    if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
-        const std::span<const ImageId> download_ids = uncommitted_downloads;
+        auto& download_ids = uncommitted_downloads;
        if (download_ids.empty()) {
            committed_downloads.emplace_back(std::move(uncommitted_downloads));
            uncommitted_downloads.clear();
-            async_buffers.emplace_back(std::optional<AsyncBuffer>{});
+            async_buffers.emplace_back(std::move(uncommitted_async_buffers));
+            uncommitted_async_buffers.clear();
            return;
        }
        size_t total_size_bytes = 0;
-        for (const ImageId image_id : download_ids) {
-            total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
+        size_t last_async_buffer_id = uncommitted_async_buffers.size();
+        bool any_none_dma = false;
+        for (PendingDownload& download_info : download_ids) {
+            if (download_info.is_swizzle) {
+                total_size_bytes +=
+                    Common::AlignUp(slot_images[download_info.object_id].unswizzled_size_bytes, 64);
+                any_none_dma = true;
+                download_info.async_buffer_id = last_async_buffer_id;
+            }
        }
-        auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
-        for (const ImageId image_id : download_ids) {
-            Image& image = slot_images[image_id];
-            const auto copies = FullDownloadCopies(image.info);
-            image.DownloadMemory(download_map, copies);
-            download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
+        if (any_none_dma) {
+            auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
+            for (const PendingDownload& download_info : download_ids) {
+                if (download_info.is_swizzle) {
+                    Image& image = slot_images[download_info.object_id];
+                    const auto copies = FullDownloadCopies(image.info);
+                    image.DownloadMemory(download_map, copies);
+                    download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
+                }
+            }
+            uncommitted_async_buffers.emplace_back(download_map);
        }
-        async_buffers.emplace_back(download_map);
+        async_buffers.emplace_back(std::move(uncommitted_async_buffers));
+        uncommitted_async_buffers.clear();
    }
    committed_downloads.emplace_back(std::move(uncommitted_downloads));
    uncommitted_downloads.clear();
@@ -691,39 +708,57 @@ void TextureCache<P>::PopAsyncFlushes() {
        return;
    }
    if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
-        const std::span<const ImageId> download_ids = committed_downloads.front();
+        const auto& download_ids = committed_downloads.front();
        if (download_ids.empty()) {
            committed_downloads.pop_front();
            async_buffers.pop_front();
            return;
        }
-        auto download_map = *async_buffers.front();
-        std::span<u8> download_span = download_map.mapped_span;
+        auto download_map = std::move(async_buffers.front());
        for (size_t i = download_ids.size(); i > 0; i--) {
-            const ImageBase& image = slot_images[download_ids[i - 1]];
-            const auto copies = FullDownloadCopies(image.info);
-            download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
-            std::span<u8> download_span_alt = download_span.subspan(download_map.offset);
-            SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt,
-                         swizzle_data_buffer);
+            auto& download_info = download_ids[i - 1];
+            auto& download_buffer = download_map[download_info.async_buffer_id];
+            if (download_info.is_swizzle) {
+                const ImageBase& image = slot_images[download_info.object_id];
+                const auto copies = FullDownloadCopies(image.info);
+                download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
+                std::span<u8> download_span =
+                    download_buffer.mapped_span.subspan(download_buffer.offset);
+                SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
+                             swizzle_data_buffer);
+            } else {
+                const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id];
+                std::span<u8> download_span =
+                    download_buffer.mapped_span.subspan(download_buffer.offset);
+                gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(),
+                                             buffer_info.size);
+                slot_buffer_downloads.erase(download_info.object_id);
+            }
+        }
+        for (auto& download_buffer : download_map) {
+            async_buffers_death_ring.emplace_back(download_buffer);
        }
-        runtime.FreeDeferredStagingBuffer(download_map);
        committed_downloads.pop_front();
        async_buffers.pop_front();
    } else {
-        const std::span<const ImageId> download_ids = committed_downloads.front();
+        const auto& download_ids = committed_downloads.front();
        if (download_ids.empty()) {
            committed_downloads.pop_front();
            return;
        }
        size_t total_size_bytes = 0;
-        for (const ImageId image_id : download_ids) {
-            total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
+        for (const PendingDownload& download_info : download_ids) {
+            if (download_info.is_swizzle) {
+                total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes;
+            }
        }
        auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
        const size_t original_offset = download_map.offset;
-        for (const ImageId image_id : download_ids) {
-            Image& image = slot_images[image_id];
+        for (const PendingDownload& download_info : download_ids) {
+            if (!download_info.is_swizzle) {
+                continue;
+            }
+            Image& image = slot_images[download_info.object_id];
            const auto copies = FullDownloadCopies(image.info);
            image.DownloadMemory(download_map, copies);
            download_map.offset += image.unswizzled_size_bytes;
@@ -732,8 +767,11 @@ void TextureCache<P>::PopAsyncFlushes() {
        runtime.Finish();
        download_map.offset = original_offset;
        std::span<u8> download_span = download_map.mapped_span;
-        for (const ImageId image_id : download_ids) {
-            const ImageBase& image = slot_images[image_id];
+        for (const PendingDownload& download_info : download_ids) {
+            if (!download_info.is_swizzle) {
+                continue;
+            }
+            const ImageBase& image = slot_images[download_info.object_id];
            const auto copies = FullDownloadCopies(image.info);
            SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
                         swizzle_data_buffer);
@@ -833,6 +871,33 @@ std::pair<typename TextureCache<P>::Image*, BufferImageCopy> TextureCache<P>::Dm
    return {image, copy};
 }

+template <class P>
+void TextureCache<P>::DownloadImageIntoBuffer(typename TextureCache<P>::Image* image,
+                                              typename TextureCache<P>::BufferType buffer,
+                                              size_t buffer_offset,
+                                              std::span<const VideoCommon::BufferImageCopy> copies,
+                                              GPUVAddr address, size_t size) {
+    if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
+        const BufferDownload new_buffer_download{address, size};
+        auto slot = slot_buffer_downloads.insert(new_buffer_download);
+        const PendingDownload new_download{false, uncommitted_async_buffers.size(), slot};
+        uncommitted_downloads.emplace_back(new_download);
+        auto download_map = runtime.DownloadStagingBuffer(size, true);
+        uncommitted_async_buffers.emplace_back(download_map);
+        std::array buffers{
+            buffer,
+            download_map.buffer,
+        };
+        std::array buffer_offsets{
+            buffer_offset,
+            download_map.offset,
+        };
+        image->DownloadMemory(buffers, buffer_offsets, copies);
+    } else {
+        image->DownloadMemory(buffer, buffer_offset, copies);
+    }
+}
+
 template <class P>
 void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
    if (False(image.flags & ImageFlagBits::CpuModified)) {
@@ -2209,7 +2274,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
    if (new_id) {
        const ImageViewBase& old_view = slot_image_views[new_id];
        if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
-            uncommitted_downloads.push_back(old_view.image_id);
+            const PendingDownload new_download{true, 0, old_view.image_id};
+            uncommitted_downloads.emplace_back(new_download);
        }
    }
    *old_id = new_id;
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
+// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-3.0-or-later

 #pragma once
@@ -40,14 +40,9 @@ struct ChannelState;

 namespace VideoCommon {

-using Tegra::Texture::SwizzleSource;
 using Tegra::Texture::TICEntry;
 using Tegra::Texture::TSCEntry;
-using VideoCore::Surface::GetFormatType;
-using VideoCore::Surface::IsCopyCompatible;
 using VideoCore::Surface::PixelFormat;
-using VideoCore::Surface::PixelFormatFromDepthFormat;
-using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
 using namespace Common::Literals;

 struct ImageViewInOut {
@@ -119,6 +114,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
    using Sampler = typename P::Sampler;
    using Framebuffer = typename P::Framebuffer;
    using AsyncBuffer = typename P::AsyncBuffer;
+    using BufferType = typename P::BufferType;

    struct BlitImages {
        ImageId dst_id;
@@ -215,6 +211,10 @@ public:
        const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand,
        const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image);

+    void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset,
+                                 std::span<const VideoCommon::BufferImageCopy> copies,
+                                 GPUVAddr address = 0, size_t size = 0);
+
    /// Return true when a CPU region is modified from the GPU
    [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);

@@ -424,17 +424,32 @@ private:
    u64 critical_memory;
    size_t critical_gc;

+    struct BufferDownload {
+        GPUVAddr address;
+        size_t size;
+    };
+
+    struct PendingDownload {
+        bool is_swizzle;
+        size_t async_buffer_id;
+        SlotId object_id;
+    };
+
    SlotVector<Image> slot_images;
    SlotVector<ImageMapView> slot_map_views;
    SlotVector<ImageView> slot_image_views;
    SlotVector<ImageAlloc> slot_image_allocs;
    SlotVector<Sampler> slot_samplers;
    SlotVector<Framebuffer> slot_framebuffers;
+    SlotVector<BufferDownload> slot_buffer_downloads;

    // TODO: This data structure is not optimal and it should be reworked
-    std::vector<ImageId> uncommitted_downloads;
-    std::deque<std::vector<ImageId>> committed_downloads;
-    std::deque<std::optional<AsyncBuffer>> async_buffers;
+
+    std::vector<PendingDownload> uncommitted_downloads;
+    std::deque<std::vector<PendingDownload>> committed_downloads;
+    std::vector<AsyncBuffer> uncommitted_async_buffers;
+    std::deque<std::vector<AsyncBuffer>> async_buffers;
+    std::deque<AsyncBuffer> async_buffers_death_ring;

    struct LRUItemParams {
        using ObjectType = ImageId;
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -145,7 +145,6 @@
    FEATURE_NAME(robustness2, robustImageAccess2)                                                  \
    FEATURE_NAME(shader_demote_to_helper_invocation, shaderDemoteToHelperInvocation)               \
    FEATURE_NAME(shader_draw_parameters, shaderDrawParameters)                                     \
-    FEATURE_NAME(timeline_semaphore, timelineSemaphore)                                            \
    FEATURE_NAME(variable_pointer, variablePointers)                                               \
    FEATURE_NAME(variable_pointer, variablePointersStorageBuffer)

@@ -158,6 +157,7 @@
    FEATURE_NAME(provoking_vertex, provokingVertexLast)                                            \
    FEATURE_NAME(shader_float16_int8, shaderFloat16)                                               \
    FEATURE_NAME(shader_float16_int8, shaderInt8)                                                  \
+    FEATURE_NAME(timeline_semaphore, timelineSemaphore)                                            \
    FEATURE_NAME(transform_feedback, transformFeedback)                                            \
    FEATURE_NAME(uniform_buffer_standard_layout, uniformBufferStandardLayout)                      \
    FEATURE_NAME(vertex_input_dynamic_state, vertexInputDynamicState)
@@ -493,6 +493,10 @@ public:
        return extensions.shader_atomic_int64;
    }

+    bool HasTimelineSemaphore() const {
+        return features.timeline_semaphore.timelineSemaphore;
+    }
+
    /// Returns the minimum supported version of SPIR-V.
    u32 SupportedSpirvVersion() const {
        if (instance_version >= VK_API_VERSION_1_3) {
Author	SHA1	Message	Date
Liam	1b5c87ab6a	kernel: match calls to Register and Unregister	2023-04-29 21:52:26 -04:00
bunnei	fe57f39676	Merge pull request #10082 from FernandoS27/the-testers-really-love-chocolate Refactor Accelerate DMA and do downloads through TC.	2023-04-29 11:46:01 -07:00
Fernando Sahmkow	4bc5469f52	Texture Cache: Release stagging buffers on tick frame	2023-04-29 15:31:38 +02:00
Fernando Sahmkow	58d1c7c77a	Address Feedback & Clang Format	2023-04-29 00:18:21 +02:00
Fernando Sahmkow	56c9730a16	Maxwell3D: only update parameters on High	2023-04-29 00:18:21 +02:00
Fernando Sahmkow	e3a2ca96bd	Accelerate DMA: Use texture cache async downloads to perform the copies to host. WIP	2023-04-29 00:18:21 +02:00
Fernando Sahmkow	3fbee093b2	TextureCache: refactor DMA downloads to allow multiple buffers.	2023-04-29 00:18:21 +02:00
Fernando S	9bf19b04f6	Merge pull request #10051 from liamwhite/surface-capabilities vulkan: pick alpha composite flags based on available values	2023-04-24 12:37:13 +02:00
Fernando S	47cd0586ee	Merge pull request #10056 from vonchenplus/audout_u core: audio: return result when audio_out initialize failed	2023-04-24 12:36:52 +02:00
Fernando S	2311fa7c84	Merge pull request #10069 from liamwhite/log maxwell_3d: fix out of bounds array access in size estimation	2023-04-24 12:36:24 +02:00
Liam	eb7c2314f6	maxwell_3d: fix out of bounds array access in size estimation	2023-04-22 10:35:26 -04:00
Fernando S	d600183583	Merge pull request #10074 from Kelebek1/fermi_blit Account for a pre-added offset when using Corner sample mode for 2D blits	2023-04-22 12:06:00 +02:00
bunnei	0f1ff5f34e	Merge pull request #10076 from german77/TryPopMyFriend core: am: Demote TryPopFromFriendInvitationStorageChannel Log level	2023-04-21 23:15:07 -07:00
bunnei	d0e6eafe23	Merge pull request #10068 from twitchax/twitchax/dr_bind_address Allow passing `--bind-address` to dedicated room.	2023-04-21 23:13:51 -07:00
bunnei	74d203fbe3	Merge pull request #10060 from german77/no_dead core: hid: Remove deadzone of virtual controller	2023-04-21 23:13:21 -07:00
german77	7ffc42c397	core: am: Demote TryPopFromFriendInvitationStorageChannel Log level	2023-04-21 22:35:45 -06:00
Kelebek1	4e14b64bfc	Account for a pre-added offset when using Corner sample mode for 2D blits	2023-04-21 19:08:21 +01:00
bunnei	0cfeb2e8d7	Merge pull request #10057 from liamwhite/its-not-in-the-timeline vulkan: use plain fences when timeline semaphores are not available	2023-04-19 16:50:24 -07:00
Aaron Roney	79e32127b3	Run clang-format to fix all.	2023-04-19 17:52:09 +00:00
bunnei	799579c8d2	Merge pull request #10053 from german77/nfp_full service: nfp: Implement all interfaces	2023-04-19 10:36:28 -07:00
Aaron Roney	3e7af5fbd7	Fix formatting.	2023-04-19 16:26:49 +00:00
Aaron Roney	34d0d94df0	Allow passing `bind_address` to dedicated room.	2023-04-19 05:37:30 +00:00
FengChen	55a33342cc	core: audio: return result when audio_out initialize failed	2023-04-16 12:31:54 +08:00
german77	70a97fb5c7	core: hid: Remove deadzone of virtual controller	2023-04-15 18:41:09 -06:00
Liam	e3fb9b5e00	vulkan: use plain fences when timeline semaphores are not available	2023-04-14 22:53:37 -04:00
bunnei	e0895a8581	Merge pull request #10030 from Wollnashorn/botw-amd-fix shader_recompiler: Fix ImageGather rounding on AMD/Intel	2023-04-14 16:56:34 -07:00
Narr the Reg	07694609fb	Merge pull request #10055 from v1993/patch-1 input_common: minor fix to mouse movement	2023-04-14 17:05:45 -06:00
Valeri	60c4032b68	input_common: minor fix to mouse movement	2023-04-14 21:27:35 +03:00
Liam	e37e1d24f9	vulkan: pick alpha composite flags based on available values	2023-04-13 16:38:20 -04:00
Wollnashorn	c0e5ecc399	video_core: Enable ImageGather rounding fix on AMD open source drivers	2023-04-12 17:11:02 +02:00
Wollnashorn	82b78cde73	shader_recompiler: Use vector arithmetic rather than component-wise in ImageGatherSubpixelOffset Should be more efficient and better readable	2023-04-08 16:13:08 +02:00
Wollnashorn	fe91066f46	video_core: Enable ImageGather with subpixel offset on Intel	2023-04-08 16:12:44 +02:00
Wollnashorn	780240e697	shader_recompiler: Add subpixel offset for correct rounding at `ImageGather` On AMD a subpixel offset of 1/512 of the texel size is applied to the texture coordinates at a ImageGather call to ensure the rounding at the texel centers is done the same way as in Maxwell or other Nvidia architectures. See https://www.reedbeta.com/blog/texture-gathers-and-coordinate-precision/ for more details why this might be necessary. This should fix shadow artifacts at object edges in Zelda: Breath of the Wild (#9957, #6956).	2023-04-08 16:12:30 +02:00