Compare commits
33 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1b5c87ab6a | ||
|
|
fe57f39676 | ||
|
|
4bc5469f52 | ||
|
|
58d1c7c77a | ||
|
|
56c9730a16 | ||
|
|
e3a2ca96bd | ||
|
|
3fbee093b2 | ||
|
|
9bf19b04f6 | ||
|
|
47cd0586ee | ||
|
|
2311fa7c84 | ||
|
|
eb7c2314f6 | ||
|
|
d600183583 | ||
|
|
0f1ff5f34e | ||
|
|
d0e6eafe23 | ||
|
|
74d203fbe3 | ||
|
|
7ffc42c397 | ||
|
|
4e14b64bfc | ||
|
|
0cfeb2e8d7 | ||
|
|
79e32127b3 | ||
|
|
799579c8d2 | ||
|
|
3e7af5fbd7 | ||
|
|
34d0d94df0 | ||
|
|
55a33342cc | ||
|
|
70a97fb5c7 | ||
|
|
e3fb9b5e00 | ||
|
|
e0895a8581 | ||
|
|
07694609fb | ||
|
|
60c4032b68 | ||
|
|
e37e1d24f9 | ||
|
|
c0e5ecc399 | ||
|
|
82b78cde73 | ||
|
|
fe91066f46 | ||
|
|
780240e697 |
@@ -293,6 +293,7 @@ struct System::Impl {
|
||||
ASSERT(Kernel::KProcess::Initialize(main_process, system, "main",
|
||||
Kernel::KProcess::ProcessType::Userland, resource_limit)
|
||||
.IsSuccess());
|
||||
Kernel::KProcess::Register(system.Kernel(), main_process);
|
||||
kernel.MakeApplicationProcess(main_process);
|
||||
const auto [load_result, load_parameters] = app_loader->Load(*main_process, system);
|
||||
if (load_result != Loader::ResultStatus::Success) {
|
||||
|
||||
@@ -280,6 +280,10 @@ void EmulatedController::LoadVirtualGamepadParams() {
|
||||
virtual_stick_params[Settings::NativeAnalog::LStick].Set("axis_y", 1);
|
||||
virtual_stick_params[Settings::NativeAnalog::RStick].Set("axis_x", 2);
|
||||
virtual_stick_params[Settings::NativeAnalog::RStick].Set("axis_y", 3);
|
||||
virtual_stick_params[Settings::NativeAnalog::LStick].Set("deadzone", 0.0f);
|
||||
virtual_stick_params[Settings::NativeAnalog::LStick].Set("range", 1.0f);
|
||||
virtual_stick_params[Settings::NativeAnalog::RStick].Set("deadzone", 0.0f);
|
||||
virtual_stick_params[Settings::NativeAnalog::RStick].Set("range", 1.0f);
|
||||
}
|
||||
|
||||
void EmulatedController::ReloadInput() {
|
||||
|
||||
@@ -182,8 +182,8 @@ public:
|
||||
explicit KAutoObjectWithList(KernelCore& kernel) : KAutoObject(kernel) {}
|
||||
|
||||
static int Compare(const KAutoObjectWithList& lhs, const KAutoObjectWithList& rhs) {
|
||||
const u64 lid = lhs.GetId();
|
||||
const u64 rid = rhs.GetId();
|
||||
const uintptr_t lid = reinterpret_cast<uintptr_t>(std::addressof(lhs));
|
||||
const uintptr_t rid = reinterpret_cast<uintptr_t>(std::addressof(rhs));
|
||||
|
||||
if (lid < rid) {
|
||||
return -1;
|
||||
|
||||
@@ -95,7 +95,7 @@ struct KernelCore::Impl {
|
||||
pt_heap_region.GetSize());
|
||||
}
|
||||
|
||||
InitializeHackSharedMemory();
|
||||
InitializeHackSharedMemory(kernel);
|
||||
RegisterHostThread(nullptr);
|
||||
}
|
||||
|
||||
@@ -216,10 +216,12 @@ struct KernelCore::Impl {
|
||||
auto* main_thread{Kernel::KThread::Create(system.Kernel())};
|
||||
main_thread->SetCurrentCore(core);
|
||||
ASSERT(Kernel::KThread::InitializeMainThread(system, main_thread, core).IsSuccess());
|
||||
KThread::Register(system.Kernel(), main_thread);
|
||||
|
||||
auto* idle_thread{Kernel::KThread::Create(system.Kernel())};
|
||||
idle_thread->SetCurrentCore(core);
|
||||
ASSERT(Kernel::KThread::InitializeIdleThread(system, idle_thread, core).IsSuccess());
|
||||
KThread::Register(system.Kernel(), idle_thread);
|
||||
|
||||
schedulers[i]->Initialize(main_thread, idle_thread, core);
|
||||
}
|
||||
@@ -230,6 +232,7 @@ struct KernelCore::Impl {
|
||||
const Core::Timing::CoreTiming& core_timing) {
|
||||
system_resource_limit = KResourceLimit::Create(system.Kernel());
|
||||
system_resource_limit->Initialize(&core_timing);
|
||||
KResourceLimit::Register(kernel, system_resource_limit);
|
||||
|
||||
const auto sizes{memory_layout->GetTotalAndKernelMemorySizes()};
|
||||
const auto total_size{sizes.first};
|
||||
@@ -355,6 +358,7 @@ struct KernelCore::Impl {
|
||||
ASSERT(KThread::InitializeHighPriorityThread(system, shutdown_threads[core_id], {}, {},
|
||||
core_id)
|
||||
.IsSuccess());
|
||||
KThread::Register(system.Kernel(), shutdown_threads[core_id]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -729,7 +733,7 @@ struct KernelCore::Impl {
|
||||
memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize());
|
||||
}
|
||||
|
||||
void InitializeHackSharedMemory() {
|
||||
void InitializeHackSharedMemory(KernelCore& kernel) {
|
||||
// Setup memory regions for emulated processes
|
||||
// TODO(bunnei): These should not be hardcoded regions initialized within the kernel
|
||||
constexpr std::size_t hid_size{0x40000};
|
||||
@@ -746,14 +750,23 @@ struct KernelCore::Impl {
|
||||
|
||||
hid_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
|
||||
Svc::MemoryPermission::Read, hid_size);
|
||||
KSharedMemory::Register(kernel, hid_shared_mem);
|
||||
|
||||
font_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
|
||||
Svc::MemoryPermission::Read, font_size);
|
||||
KSharedMemory::Register(kernel, font_shared_mem);
|
||||
|
||||
irs_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
|
||||
Svc::MemoryPermission::Read, irs_size);
|
||||
KSharedMemory::Register(kernel, irs_shared_mem);
|
||||
|
||||
time_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
|
||||
Svc::MemoryPermission::Read, time_size);
|
||||
KSharedMemory::Register(kernel, time_shared_mem);
|
||||
|
||||
hidbus_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
|
||||
Svc::MemoryPermission::Read, hidbus_size);
|
||||
KSharedMemory::Register(kernel, hidbus_shared_mem);
|
||||
}
|
||||
|
||||
std::mutex registered_objects_lock;
|
||||
@@ -1072,12 +1085,15 @@ static std::jthread RunHostThreadFunc(KernelCore& kernel, KProcess* process,
|
||||
// Commit the thread reservation.
|
||||
thread_reservation.Commit();
|
||||
|
||||
// Register the thread.
|
||||
KThread::Register(kernel, thread);
|
||||
|
||||
return std::jthread(
|
||||
[&kernel, thread, thread_name{std::move(thread_name)}, func{std::move(func)}] {
|
||||
// Set the thread name.
|
||||
Common::SetCurrentThreadName(thread_name.c_str());
|
||||
|
||||
// Register the thread.
|
||||
// Set the thread as current.
|
||||
kernel.RegisterHostThread(thread);
|
||||
|
||||
// Run the callback.
|
||||
@@ -1099,6 +1115,9 @@ std::jthread KernelCore::RunOnHostCoreProcess(std::string&& process_name,
|
||||
// Ensure that we don't hold onto any extra references.
|
||||
SCOPE_EXIT({ process->Close(); });
|
||||
|
||||
// Register the new process.
|
||||
KProcess::Register(*this, process);
|
||||
|
||||
// Run the host thread.
|
||||
return RunHostThreadFunc(*this, process, std::move(process_name), std::move(func));
|
||||
}
|
||||
@@ -1124,6 +1143,9 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function
|
||||
// Ensure that we don't hold onto any extra references.
|
||||
SCOPE_EXIT({ process->Close(); });
|
||||
|
||||
// Register the new process.
|
||||
KProcess::Register(*this, process);
|
||||
|
||||
// Reserve a new thread from the process resource limit.
|
||||
KScopedResourceReservation thread_reservation(process, LimitableResource::ThreadCountMax);
|
||||
ASSERT(thread_reservation.Succeeded());
|
||||
@@ -1136,6 +1158,9 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function
|
||||
// Commit the thread reservation.
|
||||
thread_reservation.Commit();
|
||||
|
||||
// Register the new thread.
|
||||
KThread::Register(*this, thread);
|
||||
|
||||
// Begin running the thread.
|
||||
ASSERT(R_SUCCEEDED(thread->Run()));
|
||||
}
|
||||
|
||||
@@ -1807,7 +1807,7 @@ void IApplicationFunctions::GetFriendInvitationStorageChannelEvent(HLERequestCon
|
||||
}
|
||||
|
||||
void IApplicationFunctions::TryPopFromFriendInvitationStorageChannel(HLERequestContext& ctx) {
|
||||
LOG_WARNING(Service_AM, "(STUBBED) called");
|
||||
LOG_DEBUG(Service_AM, "(STUBBED) called");
|
||||
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
rb.Push(AM::ResultNoDataInChannel);
|
||||
|
||||
@@ -49,12 +49,6 @@ public:
|
||||
};
|
||||
// clang-format on
|
||||
RegisterHandlers(functions);
|
||||
|
||||
if (impl->GetSystem()
|
||||
.Initialize(device_name, in_params, handle, applet_resource_user_id)
|
||||
.IsError()) {
|
||||
LOG_ERROR(Service_Audio, "Failed to initialize the AudioOut System!");
|
||||
}
|
||||
}
|
||||
|
||||
~IAudioOut() override {
|
||||
@@ -287,6 +281,14 @@ void AudOutU::OpenAudioOut(HLERequestContext& ctx) {
|
||||
|
||||
auto audio_out = std::make_shared<IAudioOut>(system, *impl, new_session_id, device_name,
|
||||
in_params, handle, applet_resource_user_id);
|
||||
result = audio_out->GetImpl()->GetSystem().Initialize(device_name, in_params, handle,
|
||||
applet_resource_user_id);
|
||||
if (result.IsError()) {
|
||||
LOG_ERROR(Service_Audio, "Failed to initialize the AudioOut System!");
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
rb.Push(result);
|
||||
return;
|
||||
}
|
||||
|
||||
impl->sessions[new_session_id] = audio_out->GetImpl();
|
||||
impl->applet_resource_user_ids[new_session_id] = applet_resource_user_id;
|
||||
|
||||
@@ -156,6 +156,7 @@ public:
|
||||
|
||||
auto* session = Kernel::KSession::Create(kernel);
|
||||
session->Initialize(nullptr, 0);
|
||||
Kernel::KSession::Register(kernel, session);
|
||||
|
||||
auto next_manager = std::make_shared<Service::SessionRequestManager>(
|
||||
kernel, manager->GetServerManager());
|
||||
|
||||
@@ -25,6 +25,9 @@ ServiceContext::ServiceContext(Core::System& system_, std::string name_)
|
||||
Kernel::KProcess::ProcessType::KernelInternal,
|
||||
kernel.GetSystemResourceLimit())
|
||||
.IsSuccess());
|
||||
|
||||
// Register the process.
|
||||
Kernel::KProcess::Register(kernel, process);
|
||||
process_created = true;
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,9 @@ Mutex::Mutex(Core::System& system) : m_system(system) {
|
||||
m_event = Kernel::KEvent::Create(system.Kernel());
|
||||
m_event->Initialize(nullptr);
|
||||
|
||||
// Register the event.
|
||||
Kernel::KEvent::Register(system.Kernel(), m_event);
|
||||
|
||||
ASSERT(R_SUCCEEDED(m_event->Signal()));
|
||||
}
|
||||
|
||||
|
||||
@@ -33,6 +33,9 @@ ServerManager::ServerManager(Core::System& system) : m_system{system}, m_serve_m
|
||||
// Initialize event.
|
||||
m_event = Kernel::KEvent::Create(system.Kernel());
|
||||
m_event->Initialize(nullptr);
|
||||
|
||||
// Register event.
|
||||
Kernel::KEvent::Register(system.Kernel(), m_event);
|
||||
}
|
||||
|
||||
ServerManager::~ServerManager() {
|
||||
@@ -160,6 +163,9 @@ Result ServerManager::ManageDeferral(Kernel::KEvent** out_event) {
|
||||
// Initialize the event.
|
||||
m_deferral_event->Initialize(nullptr);
|
||||
|
||||
// Register the event.
|
||||
Kernel::KEvent::Register(m_system.Kernel(), m_deferral_event);
|
||||
|
||||
// Set the output.
|
||||
*out_event = m_deferral_event;
|
||||
|
||||
|
||||
@@ -64,6 +64,9 @@ Result ServiceManager::RegisterService(std::string name, u32 max_sessions,
|
||||
auto* port = Kernel::KPort::Create(kernel);
|
||||
port->Initialize(ServerSessionCountMax, false, 0);
|
||||
|
||||
// Register the port.
|
||||
Kernel::KPort::Register(kernel, port);
|
||||
|
||||
service_ports.emplace(name, port);
|
||||
registered_services.emplace(name, handler);
|
||||
if (deferral_event) {
|
||||
|
||||
@@ -49,6 +49,9 @@ void Controller::CloneCurrentObject(HLERequestContext& ctx) {
|
||||
// Commit the session reservation.
|
||||
session_reservation.Commit();
|
||||
|
||||
// Register the session.
|
||||
Kernel::KSession::Register(system.Kernel(), session);
|
||||
|
||||
// Register with server manager.
|
||||
session_manager->GetServerManager().RegisterSession(&session->GetServerSession(),
|
||||
session_manager);
|
||||
|
||||
@@ -49,6 +49,7 @@ static void PrintHelp(const char* argv0) {
|
||||
" [options] <filename>\n"
|
||||
"--room-name The name of the room\n"
|
||||
"--room-description The room description\n"
|
||||
"--bind-address The bind address for the room\n"
|
||||
"--port The port used for the room\n"
|
||||
"--max_members The maximum number of players for this room\n"
|
||||
"--password The password for the room\n"
|
||||
@@ -195,6 +196,7 @@ int main(int argc, char** argv) {
|
||||
std::string web_api_url;
|
||||
std::string ban_list_file;
|
||||
std::string log_file = "yuzu-room.log";
|
||||
std::string bind_address;
|
||||
u64 preferred_game_id = 0;
|
||||
u32 port = Network::DefaultRoomPort;
|
||||
u32 max_members = 16;
|
||||
@@ -203,6 +205,7 @@ int main(int argc, char** argv) {
|
||||
static struct option long_options[] = {
|
||||
{"room-name", required_argument, 0, 'n'},
|
||||
{"room-description", required_argument, 0, 'd'},
|
||||
{"bind-address", required_argument, 0, 's'},
|
||||
{"port", required_argument, 0, 'p'},
|
||||
{"max_members", required_argument, 0, 'm'},
|
||||
{"password", required_argument, 0, 'w'},
|
||||
@@ -222,7 +225,8 @@ int main(int argc, char** argv) {
|
||||
InitializeLogging(log_file);
|
||||
|
||||
while (optind < argc) {
|
||||
int arg = getopt_long(argc, argv, "n:d:p:m:w:g:u:t:a:i:l:hv", long_options, &option_index);
|
||||
int arg =
|
||||
getopt_long(argc, argv, "n:d:s:p:m:w:g:u:t:a:i:l:hv", long_options, &option_index);
|
||||
if (arg != -1) {
|
||||
switch (static_cast<char>(arg)) {
|
||||
case 'n':
|
||||
@@ -231,6 +235,9 @@ int main(int argc, char** argv) {
|
||||
case 'd':
|
||||
room_description.assign(optarg);
|
||||
break;
|
||||
case 's':
|
||||
bind_address.assign(optarg);
|
||||
break;
|
||||
case 'p':
|
||||
port = strtoul(optarg, &endarg, 0);
|
||||
break;
|
||||
@@ -295,6 +302,9 @@ int main(int argc, char** argv) {
|
||||
PrintHelp(argv[0]);
|
||||
return -1;
|
||||
}
|
||||
if (bind_address.empty()) {
|
||||
LOG_INFO(Network, "Bind address is empty: defaulting to 0.0.0.0");
|
||||
}
|
||||
if (port > UINT16_MAX) {
|
||||
LOG_ERROR(Network, "Port needs to be in the range 0 - 65535!");
|
||||
PrintHelp(argv[0]);
|
||||
@@ -358,8 +368,8 @@ int main(int argc, char** argv) {
|
||||
if (auto room = network.GetRoom().lock()) {
|
||||
AnnounceMultiplayerRoom::GameInfo preferred_game_info{.name = preferred_game,
|
||||
.id = preferred_game_id};
|
||||
if (!room->Create(room_name, room_description, "", port, password, max_members, username,
|
||||
preferred_game_info, std::move(verify_backend), ban_list,
|
||||
if (!room->Create(room_name, room_description, bind_address, port, password, max_members,
|
||||
username, preferred_game_info, std::move(verify_backend), ban_list,
|
||||
enable_yuzu_mods)) {
|
||||
LOG_INFO(Network, "Failed to create room: ");
|
||||
return -1;
|
||||
|
||||
@@ -135,7 +135,7 @@ void Mouse::Move(int x, int y, int center_x, int center_y) {
|
||||
|
||||
auto mouse_change =
|
||||
(Common::MakeVec(x, y) - Common::MakeVec(center_x, center_y)).Cast<float>();
|
||||
last_motion_change += {-mouse_change.y, -mouse_change.x, last_motion_change.z};
|
||||
last_motion_change += {-mouse_change.y, -mouse_change.x, 0};
|
||||
|
||||
const auto move_distance = mouse_change.Length();
|
||||
if (move_distance == 0) {
|
||||
|
||||
@@ -143,6 +143,21 @@ IR::Inst* PrepareSparse(IR::Inst& inst) {
|
||||
}
|
||||
return sparse_inst;
|
||||
}
|
||||
|
||||
std::string ImageGatherSubpixelOffset(const IR::TextureInstInfo& info, std::string_view texture,
|
||||
std::string_view coords) {
|
||||
switch (info.type) {
|
||||
case TextureType::Color2D:
|
||||
case TextureType::Color2DRect:
|
||||
return fmt::format("{}+vec2(0.001953125)/vec2(textureSize({}, 0))", coords, texture);
|
||||
case TextureType::ColorArray2D:
|
||||
case TextureType::ColorCube:
|
||||
return fmt::format("vec3({0}.xy+vec2(0.001953125)/vec2(textureSize({1}, 0)),{0}.z)", coords,
|
||||
texture);
|
||||
default:
|
||||
return std::string{coords};
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
||||
@@ -340,6 +355,13 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
||||
LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
|
||||
ctx.AddU1("{}=true;", *sparse_inst);
|
||||
}
|
||||
std::string coords_with_subpixel_offset;
|
||||
if (ctx.profile.need_gather_subpixel_offset) {
|
||||
// Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on
|
||||
// AMD hardware as on Maxwell or other Nvidia architectures.
|
||||
coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords);
|
||||
coords = coords_with_subpixel_offset;
|
||||
}
|
||||
if (!sparse_inst || !supports_sparse) {
|
||||
if (offset.IsEmpty()) {
|
||||
ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords,
|
||||
@@ -387,6 +409,13 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
|
||||
LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
|
||||
ctx.AddU1("{}=true;", *sparse_inst);
|
||||
}
|
||||
std::string coords_with_subpixel_offset;
|
||||
if (ctx.profile.need_gather_subpixel_offset) {
|
||||
// Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on
|
||||
// AMD hardware as on Maxwell or other Nvidia architectures.
|
||||
coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords);
|
||||
coords = coords_with_subpixel_offset;
|
||||
}
|
||||
if (!sparse_inst || !supports_sparse) {
|
||||
if (offset.IsEmpty()) {
|
||||
ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref);
|
||||
|
||||
@@ -261,6 +261,30 @@ Id BitTest(EmitContext& ctx, Id mask, Id bit) {
|
||||
const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))};
|
||||
return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value);
|
||||
}
|
||||
|
||||
Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info, Id texture,
|
||||
Id coords) {
|
||||
// Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on
|
||||
// AMD hardware as on Maxwell or other Nvidia architectures.
|
||||
const auto calculate_coords{[&](size_t dim) {
|
||||
const Id nudge{ctx.Const(0x1p-9f)};
|
||||
const Id image_size{ctx.OpImageQuerySizeLod(ctx.U32[dim], texture, ctx.u32_zero_value)};
|
||||
Id offset{dim == 2 ? ctx.ConstantComposite(ctx.F32[dim], nudge, nudge)
|
||||
: ctx.ConstantComposite(ctx.F32[dim], nudge, nudge, ctx.f32_zero_value)};
|
||||
offset = ctx.OpFDiv(ctx.F32[dim], offset, ctx.OpConvertUToF(ctx.F32[dim], image_size));
|
||||
return ctx.OpFAdd(ctx.F32[dim], coords, offset);
|
||||
}};
|
||||
switch (info.type) {
|
||||
case TextureType::Color2D:
|
||||
case TextureType::Color2DRect:
|
||||
return calculate_coords(2);
|
||||
case TextureType::ColorArray2D:
|
||||
case TextureType::ColorCube:
|
||||
return calculate_coords(3);
|
||||
default:
|
||||
return coords;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
|
||||
@@ -423,6 +447,9 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
|
||||
const IR::Value& offset, const IR::Value& offset2) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
const ImageOperands operands(ctx, offset, offset2);
|
||||
if (ctx.profile.need_gather_subpixel_offset) {
|
||||
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
|
||||
}
|
||||
return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
|
||||
ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component),
|
||||
operands.MaskOptional(), operands.Span());
|
||||
@@ -432,6 +459,9 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
const IR::Value& offset, const IR::Value& offset2, Id dref) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
const ImageOperands operands(ctx, offset, offset2);
|
||||
if (ctx.profile.need_gather_subpixel_offset) {
|
||||
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
|
||||
}
|
||||
return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
|
||||
ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(),
|
||||
operands.Span());
|
||||
|
||||
@@ -52,6 +52,10 @@ struct Profile {
|
||||
bool need_declared_frag_colors{};
|
||||
/// Prevents fast math optimizations that may cause inaccuracies
|
||||
bool need_fastmath_off{};
|
||||
/// Some GPU vendors use a different rounding precision when calculating texture pixel
|
||||
/// coordinates with the 16.8 format in the ImageGather instruction than the Maxwell
|
||||
/// architecture. Applying an offset does fix this mismatching rounding behaviour.
|
||||
bool need_gather_subpixel_offset{};
|
||||
|
||||
/// OpFClamp is broken and OpFMax + OpFMin should be used instead
|
||||
bool has_broken_spirv_clamp{};
|
||||
|
||||
@@ -77,6 +77,14 @@ void Fermi2D::Blit() {
|
||||
const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
|
||||
const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 &&
|
||||
src.format != regs.dst.format;
|
||||
|
||||
auto srcX = args.src_x0;
|
||||
auto srcY = args.src_y0;
|
||||
if (args.sample_mode.origin == Origin::Corner) {
|
||||
srcX -= (args.du_dx >> 33) << 32;
|
||||
srcY -= (args.dv_dy >> 33) << 32;
|
||||
}
|
||||
|
||||
Config config{
|
||||
.operation = regs.operation,
|
||||
.filter = args.sample_mode.filter,
|
||||
@@ -86,10 +94,10 @@ void Fermi2D::Blit() {
|
||||
.dst_y0 = args.dst_y0,
|
||||
.dst_x1 = args.dst_x0 + args.dst_width,
|
||||
.dst_y1 = args.dst_y0 + args.dst_height,
|
||||
.src_x0 = static_cast<s32>(args.src_x0 >> 32),
|
||||
.src_y0 = static_cast<s32>(args.src_y0 >> 32),
|
||||
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
|
||||
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
|
||||
.src_x0 = static_cast<s32>(srcX >> 32),
|
||||
.src_y0 = static_cast<s32>(srcY >> 32),
|
||||
.src_x1 = static_cast<s32>((srcX + args.du_dx * args.dst_width) >> 32),
|
||||
.src_y1 = static_cast<s32>((srcY + args.dv_dy * args.dst_height) >> 32),
|
||||
};
|
||||
|
||||
const auto need_align_to_pitch =
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <cstring>
|
||||
#include <optional>
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
@@ -222,6 +223,9 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
|
||||
}
|
||||
|
||||
void Maxwell3D::RefreshParametersImpl() {
|
||||
if (!Settings::IsGPULevelHigh()) {
|
||||
return;
|
||||
}
|
||||
size_t current_index = 0;
|
||||
for (auto& segment : macro_segments) {
|
||||
if (segment.first == 0) {
|
||||
@@ -259,12 +263,13 @@ u32 Maxwell3D::GetMaxCurrentVertices() {
|
||||
size_t Maxwell3D::EstimateIndexBufferSize() {
|
||||
GPUVAddr start_address = regs.index_buffer.StartAddress();
|
||||
GPUVAddr end_address = regs.index_buffer.EndAddress();
|
||||
static constexpr std::array<size_t, 4> max_sizes = {
|
||||
std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(),
|
||||
std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
|
||||
static constexpr std::array<size_t, 3> max_sizes = {std::numeric_limits<u8>::max(),
|
||||
std::numeric_limits<u16>::max(),
|
||||
std::numeric_limits<u32>::max()};
|
||||
const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
|
||||
const size_t log2_byte_size = Common::Log2Ceil64(byte_size);
|
||||
return std::min<size_t>(
|
||||
memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) /
|
||||
memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[log2_byte_size]) /
|
||||
byte_size,
|
||||
static_cast<size_t>(end_address - start_address));
|
||||
}
|
||||
|
||||
@@ -176,6 +176,10 @@ public:
|
||||
return vendor_name == "ATI Technologies Inc.";
|
||||
}
|
||||
|
||||
bool IsIntel() const {
|
||||
return vendor_name == "Intel";
|
||||
}
|
||||
|
||||
bool CanReportMemoryUsage() const {
|
||||
return can_report_memory;
|
||||
}
|
||||
|
||||
@@ -1287,8 +1287,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
|
||||
}
|
||||
const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
|
||||
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
|
||||
const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing
|
||||
: VideoCommon::ObtainBufferOperation::MarkAsWritten;
|
||||
const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
|
||||
const auto [buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
|
||||
|
||||
@@ -1299,7 +1298,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
|
||||
if constexpr (IS_IMAGE_UPLOAD) {
|
||||
image->UploadMemory(buffer->Handle(), offset, copy_span);
|
||||
} else {
|
||||
image->DownloadMemory(buffer->Handle(), offset, copy_span);
|
||||
texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
|
||||
buffer_operand.address, buffer_size);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -218,6 +218,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
|
||||
.lower_left_origin_mode = true,
|
||||
.need_declared_frag_colors = true,
|
||||
.need_fastmath_off = device.NeedsFastmathOff(),
|
||||
.need_gather_subpixel_offset = device.IsAmd() || device.IsIntel(),
|
||||
|
||||
.has_broken_spirv_clamp = true,
|
||||
.has_broken_unsigned_image_offsets = true,
|
||||
|
||||
@@ -803,30 +803,40 @@ void Image::UploadMemory(const ImageBufferMap& map,
|
||||
|
||||
void Image::DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
std::array buffer_handles{buffer_handle};
|
||||
std::array buffer_offsets{buffer_offset};
|
||||
DownloadMemory(buffer_handles, buffer_offsets, copies);
|
||||
}
|
||||
|
||||
void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> buffer_offsets,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
|
||||
if (is_rescaled) {
|
||||
ScaleDown();
|
||||
}
|
||||
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle);
|
||||
glPixelStorei(GL_PACK_ALIGNMENT, 1);
|
||||
for (size_t i = 0; i < buffer_handles.size(); i++) {
|
||||
auto& buffer_handle = buffer_handles[i];
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle);
|
||||
glPixelStorei(GL_PACK_ALIGNMENT, 1);
|
||||
|
||||
u32 current_row_length = std::numeric_limits<u32>::max();
|
||||
u32 current_image_height = std::numeric_limits<u32>::max();
|
||||
u32 current_row_length = std::numeric_limits<u32>::max();
|
||||
u32 current_image_height = std::numeric_limits<u32>::max();
|
||||
|
||||
for (const VideoCommon::BufferImageCopy& copy : copies) {
|
||||
if (copy.image_subresource.base_level >= gl_num_levels) {
|
||||
continue;
|
||||
for (const VideoCommon::BufferImageCopy& copy : copies) {
|
||||
if (copy.image_subresource.base_level >= gl_num_levels) {
|
||||
continue;
|
||||
}
|
||||
if (current_row_length != copy.buffer_row_length) {
|
||||
current_row_length = copy.buffer_row_length;
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
|
||||
}
|
||||
if (current_image_height != copy.buffer_image_height) {
|
||||
current_image_height = copy.buffer_image_height;
|
||||
glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
|
||||
}
|
||||
CopyImageToBuffer(copy, buffer_offsets[i]);
|
||||
}
|
||||
if (current_row_length != copy.buffer_row_length) {
|
||||
current_row_length = copy.buffer_row_length;
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
|
||||
}
|
||||
if (current_image_height != copy.buffer_image_height) {
|
||||
current_image_height = copy.buffer_image_height;
|
||||
glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
|
||||
}
|
||||
CopyImageToBuffer(copy, buffer_offset);
|
||||
}
|
||||
if (is_rescaled) {
|
||||
ScaleUp(true);
|
||||
|
||||
@@ -215,6 +215,9 @@ public:
|
||||
void DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
GLuint StorageHandle() noexcept;
|
||||
@@ -376,6 +379,7 @@ struct TextureCacheParams {
|
||||
using Sampler = OpenGL::Sampler;
|
||||
using Framebuffer = OpenGL::Framebuffer;
|
||||
using AsyncBuffer = u32;
|
||||
using BufferType = GLuint;
|
||||
};
|
||||
|
||||
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
|
||||
|
||||
@@ -10,7 +10,14 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
MasterSemaphore::MasterSemaphore(const Device& device) {
|
||||
MasterSemaphore::MasterSemaphore(const Device& device_) : device(device_) {
|
||||
if (!device.HasTimelineSemaphore()) {
|
||||
static constexpr VkFenceCreateInfo fence_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0};
|
||||
fence = device.GetLogical().CreateFence(fence_ci);
|
||||
return;
|
||||
}
|
||||
|
||||
static constexpr VkSemaphoreTypeCreateInfo semaphore_type_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
@@ -42,4 +49,134 @@ MasterSemaphore::MasterSemaphore(const Device& device) {
|
||||
|
||||
MasterSemaphore::~MasterSemaphore() = default;
|
||||
|
||||
void MasterSemaphore::Refresh() {
|
||||
if (!semaphore) {
|
||||
// If we don't support timeline semaphores, there's nothing to refresh
|
||||
return;
|
||||
}
|
||||
|
||||
u64 this_tick{};
|
||||
u64 counter{};
|
||||
do {
|
||||
this_tick = gpu_tick.load(std::memory_order_acquire);
|
||||
counter = semaphore.GetCounter();
|
||||
if (counter < this_tick) {
|
||||
return;
|
||||
}
|
||||
} while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
|
||||
std::memory_order_relaxed));
|
||||
}
|
||||
|
||||
void MasterSemaphore::Wait(u64 tick) {
|
||||
if (!semaphore) {
|
||||
// If we don't support timeline semaphores, use an atomic wait
|
||||
while (true) {
|
||||
u64 current_value = gpu_tick.load(std::memory_order_relaxed);
|
||||
if (current_value >= tick) {
|
||||
return;
|
||||
}
|
||||
gpu_tick.wait(current_value);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// No need to wait if the GPU is ahead of the tick
|
||||
if (IsFree(tick)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Update the GPU tick and try again
|
||||
Refresh();
|
||||
|
||||
if (IsFree(tick)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If none of the above is hit, fallback to a regular wait
|
||||
while (!semaphore.Wait(tick)) {
|
||||
}
|
||||
|
||||
Refresh();
|
||||
}
|
||||
|
||||
VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick) {
|
||||
if (semaphore) {
|
||||
return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
|
||||
} else {
|
||||
return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick);
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
};
|
||||
|
||||
VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
|
||||
VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick) {
|
||||
const VkSemaphore timeline_semaphore = *semaphore;
|
||||
|
||||
const u32 num_signal_semaphores = signal_semaphore ? 2 : 1;
|
||||
const std::array signal_values{host_tick, u64(0)};
|
||||
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
|
||||
|
||||
const u32 num_wait_semaphores = wait_semaphore ? 2 : 1;
|
||||
const std::array wait_values{host_tick - 1, u64(1)};
|
||||
const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
|
||||
|
||||
const VkTimelineSemaphoreSubmitInfo timeline_si{
|
||||
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
|
||||
.pNext = nullptr,
|
||||
.waitSemaphoreValueCount = num_wait_semaphores,
|
||||
.pWaitSemaphoreValues = wait_values.data(),
|
||||
.signalSemaphoreValueCount = num_signal_semaphores,
|
||||
.pSignalSemaphoreValues = signal_values.data(),
|
||||
};
|
||||
const VkSubmitInfo submit_info{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pNext = &timeline_si,
|
||||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = wait_semaphores.data(),
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = cmdbuf.address(),
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = signal_semaphores.data(),
|
||||
};
|
||||
|
||||
return device.GetGraphicsQueue().Submit(submit_info);
|
||||
}
|
||||
|
||||
VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick) {
|
||||
const u32 num_signal_semaphores = signal_semaphore ? 1 : 0;
|
||||
const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
|
||||
|
||||
const VkSubmitInfo submit_info{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pNext = nullptr,
|
||||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = &wait_semaphore,
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = cmdbuf.address(),
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = &signal_semaphore,
|
||||
};
|
||||
|
||||
auto result = device.GetGraphicsQueue().Submit(submit_info, *fence);
|
||||
|
||||
if (result == VK_SUCCESS) {
|
||||
fence.Wait();
|
||||
fence.Reset();
|
||||
gpu_tick.store(host_tick);
|
||||
gpu_tick.notify_all();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
#include "common/common_types.h"
|
||||
@@ -29,11 +31,6 @@ public:
|
||||
return gpu_tick.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
/// Returns the timeline semaphore handle.
|
||||
[[nodiscard]] VkSemaphore Handle() const noexcept {
|
||||
return *semaphore;
|
||||
}
|
||||
|
||||
/// Returns true when a tick has been hit by the GPU.
|
||||
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
|
||||
return KnownGpuTick() >= tick;
|
||||
@@ -45,37 +42,24 @@ public:
|
||||
}
|
||||
|
||||
/// Refresh the known GPU tick
|
||||
void Refresh() {
|
||||
u64 this_tick{};
|
||||
u64 counter{};
|
||||
do {
|
||||
this_tick = gpu_tick.load(std::memory_order_acquire);
|
||||
counter = semaphore.GetCounter();
|
||||
if (counter < this_tick) {
|
||||
return;
|
||||
}
|
||||
} while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
|
||||
std::memory_order_relaxed));
|
||||
}
|
||||
void Refresh();
|
||||
|
||||
/// Waits for a tick to be hit on the GPU
|
||||
void Wait(u64 tick) {
|
||||
// No need to wait if the GPU is ahead of the tick
|
||||
if (IsFree(tick)) {
|
||||
return;
|
||||
}
|
||||
// Update the GPU tick and try again
|
||||
Refresh();
|
||||
if (IsFree(tick)) {
|
||||
return;
|
||||
}
|
||||
// If none of the above is hit, fallback to a regular wait
|
||||
while (!semaphore.Wait(tick)) {
|
||||
}
|
||||
Refresh();
|
||||
}
|
||||
void Wait(u64 tick);
|
||||
|
||||
/// Submits the device graphics queue, updating the tick as necessary
|
||||
VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick);
|
||||
|
||||
private:
|
||||
VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick);
|
||||
VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
|
||||
VkSemaphore wait_semaphore, u64 host_tick);
|
||||
|
||||
private:
|
||||
const Device& device; ///< Device.
|
||||
vk::Fence fence; ///< Fence.
|
||||
vk::Semaphore semaphore; ///< Timeline semaphore.
|
||||
std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
|
||||
std::atomic<u64> current_tick{1}; ///< Current logical tick.
|
||||
|
||||
@@ -329,6 +329,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
|
||||
|
||||
.lower_left_origin_mode = false,
|
||||
.need_declared_frag_colors = false,
|
||||
.need_gather_subpixel_offset = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY ||
|
||||
driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
|
||||
driver_id == VK_DRIVER_ID_MESA_RADV ||
|
||||
driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS ||
|
||||
driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA,
|
||||
|
||||
.has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS,
|
||||
.has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
|
||||
|
||||
@@ -781,8 +781,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
|
||||
}
|
||||
const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
|
||||
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
|
||||
const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing
|
||||
: VideoCommon::ObtainBufferOperation::MarkAsWritten;
|
||||
const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
|
||||
const auto [buffer, offset] =
|
||||
buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
|
||||
|
||||
@@ -793,7 +792,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
|
||||
if constexpr (IS_IMAGE_UPLOAD) {
|
||||
image->UploadMemory(buffer->Handle(), offset, copy_span);
|
||||
} else {
|
||||
image->DownloadMemory(buffer->Handle(), offset, copy_span);
|
||||
texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
|
||||
buffer_operand.address, buffer_size);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -212,45 +212,13 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
|
||||
const u64 signal_value = master_semaphore->NextTick();
|
||||
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.End();
|
||||
const VkSemaphore timeline_semaphore = master_semaphore->Handle();
|
||||
|
||||
const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
|
||||
const std::array signal_values{signal_value, u64(0)};
|
||||
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
|
||||
|
||||
const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
|
||||
const std::array wait_values{signal_value - 1, u64(1)};
|
||||
const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
|
||||
static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
};
|
||||
|
||||
const VkTimelineSemaphoreSubmitInfo timeline_si{
|
||||
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
|
||||
.pNext = nullptr,
|
||||
.waitSemaphoreValueCount = num_wait_semaphores,
|
||||
.pWaitSemaphoreValues = wait_values.data(),
|
||||
.signalSemaphoreValueCount = num_signal_semaphores,
|
||||
.pSignalSemaphoreValues = signal_values.data(),
|
||||
};
|
||||
const VkSubmitInfo submit_info{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pNext = &timeline_si,
|
||||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = wait_semaphores.data(),
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = cmdbuf.address(),
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = signal_semaphores.data(),
|
||||
};
|
||||
|
||||
if (on_submit) {
|
||||
on_submit();
|
||||
}
|
||||
|
||||
switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
|
||||
switch (const VkResult result = master_semaphore->SubmitQueue(
|
||||
cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
|
||||
case VK_SUCCESS:
|
||||
break;
|
||||
case VK_ERROR_DEVICE_LOST:
|
||||
|
||||
@@ -65,6 +65,18 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
|
||||
return extent;
|
||||
}
|
||||
|
||||
VkCompositeAlphaFlagBitsKHR ChooseAlphaFlags(const VkSurfaceCapabilitiesKHR& capabilities) {
|
||||
if (capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR) {
|
||||
return VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
|
||||
} else if (capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) {
|
||||
return VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR;
|
||||
} else {
|
||||
LOG_ERROR(Render_Vulkan, "Unknown composite alpha flags value {:#x}",
|
||||
capabilities.supportedCompositeAlpha);
|
||||
return VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_,
|
||||
@@ -155,6 +167,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
|
||||
const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
|
||||
const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
|
||||
|
||||
const VkCompositeAlphaFlagBitsKHR alpha_flags{ChooseAlphaFlags(capabilities)};
|
||||
const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};
|
||||
present_mode = ChooseSwapPresentMode(present_modes);
|
||||
|
||||
@@ -185,7 +198,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
.preTransform = capabilities.currentTransform,
|
||||
.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
|
||||
.compositeAlpha = alpha_flags,
|
||||
.presentMode = present_mode,
|
||||
.clipped = VK_FALSE,
|
||||
.oldSwapchain = nullptr,
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "common/bit_cast.h"
|
||||
#include "common/bit_util.h"
|
||||
@@ -1343,14 +1344,31 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag
|
||||
|
||||
void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
std::array buffer_handles{
|
||||
buffer,
|
||||
};
|
||||
std::array buffer_offsets{
|
||||
offset,
|
||||
};
|
||||
DownloadMemory(buffer_handles, buffer_offsets, copies);
|
||||
}
|
||||
|
||||
void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceSize> offsets_span,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies) {
|
||||
const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
|
||||
if (is_rescaled) {
|
||||
ScaleDown();
|
||||
}
|
||||
std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
|
||||
boost::container::small_vector<VkBuffer, 1> buffers_vector{};
|
||||
boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies;
|
||||
for (size_t index = 0; index < buffers_span.size(); index++) {
|
||||
buffers_vector.emplace_back(buffers_span[index]);
|
||||
vk_copies.emplace_back(
|
||||
TransformBufferImageCopies(copies, offsets_span[index], aspect_mask));
|
||||
}
|
||||
scheduler->RequestOutsideRenderPassOperationContext();
|
||||
scheduler->Record([buffer, image = *original_image, aspect_mask = aspect_mask,
|
||||
vk_copies](vk::CommandBuffer cmdbuf) {
|
||||
scheduler->Record([buffers = std::move(buffers_vector), image = *original_image,
|
||||
aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
|
||||
const VkImageMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
@@ -1369,6 +1387,20 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, read_barrier);
|
||||
|
||||
for (size_t index = 0; index < buffers.size(); index++) {
|
||||
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index],
|
||||
vk_copies[index]);
|
||||
}
|
||||
|
||||
const VkMemoryBarrier memory_write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
const VkImageMemoryBarrier image_write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
@@ -1387,15 +1419,6 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
const VkMemoryBarrier memory_write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, read_barrier);
|
||||
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, memory_write_barrier, nullptr, image_write_barrier);
|
||||
});
|
||||
@@ -1405,7 +1428,13 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
|
||||
}
|
||||
|
||||
void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
|
||||
DownloadMemory(map.buffer, map.offset, copies);
|
||||
std::array buffers{
|
||||
map.buffer,
|
||||
};
|
||||
std::array offsets{
|
||||
map.offset,
|
||||
};
|
||||
DownloadMemory(buffers, offsets, copies);
|
||||
}
|
||||
|
||||
bool Image::IsRescaled() const noexcept {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
@@ -141,6 +141,9 @@ public:
|
||||
void DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void DownloadMemory(std::span<VkBuffer> buffers, std::span<VkDeviceSize> offsets,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void DownloadMemory(const StagingBufferRef& map,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
@@ -371,6 +374,7 @@ struct TextureCacheParams {
|
||||
using Sampler = Vulkan::Sampler;
|
||||
using Framebuffer = Vulkan::Framebuffer;
|
||||
using AsyncBuffer = Vulkan::StagingBufferRef;
|
||||
using BufferType = VkBuffer;
|
||||
};
|
||||
|
||||
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_set>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/settings.h"
|
||||
@@ -17,15 +18,10 @@
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
using Tegra::Texture::SwizzleSource;
|
||||
using Tegra::Texture::TextureType;
|
||||
using Tegra::Texture::TICEntry;
|
||||
using Tegra::Texture::TSCEntry;
|
||||
using VideoCore::Surface::GetFormatType;
|
||||
using VideoCore::Surface::IsCopyCompatible;
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::PixelFormatFromDepthFormat;
|
||||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
using namespace Common::Literals;
|
||||
|
||||
@@ -143,6 +139,13 @@ void TextureCache<P>::TickFrame() {
|
||||
runtime.TickFrame();
|
||||
critical_gc = 0;
|
||||
++frame_tick;
|
||||
|
||||
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
|
||||
for (auto& buffer : async_buffers_death_ring) {
|
||||
runtime.FreeDeferredStagingBuffer(buffer);
|
||||
}
|
||||
async_buffers_death_ring.clear();
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
@@ -661,25 +664,39 @@ template <class P>
|
||||
void TextureCache<P>::CommitAsyncFlushes() {
|
||||
// This is intentionally passing the value by copy
|
||||
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
|
||||
const std::span<const ImageId> download_ids = uncommitted_downloads;
|
||||
auto& download_ids = uncommitted_downloads;
|
||||
if (download_ids.empty()) {
|
||||
committed_downloads.emplace_back(std::move(uncommitted_downloads));
|
||||
uncommitted_downloads.clear();
|
||||
async_buffers.emplace_back(std::optional<AsyncBuffer>{});
|
||||
async_buffers.emplace_back(std::move(uncommitted_async_buffers));
|
||||
uncommitted_async_buffers.clear();
|
||||
return;
|
||||
}
|
||||
size_t total_size_bytes = 0;
|
||||
for (const ImageId image_id : download_ids) {
|
||||
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
|
||||
size_t last_async_buffer_id = uncommitted_async_buffers.size();
|
||||
bool any_none_dma = false;
|
||||
for (PendingDownload& download_info : download_ids) {
|
||||
if (download_info.is_swizzle) {
|
||||
total_size_bytes +=
|
||||
Common::AlignUp(slot_images[download_info.object_id].unswizzled_size_bytes, 64);
|
||||
any_none_dma = true;
|
||||
download_info.async_buffer_id = last_async_buffer_id;
|
||||
}
|
||||
}
|
||||
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
|
||||
for (const ImageId image_id : download_ids) {
|
||||
Image& image = slot_images[image_id];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
image.DownloadMemory(download_map, copies);
|
||||
download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
|
||||
if (any_none_dma) {
|
||||
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
|
||||
for (const PendingDownload& download_info : download_ids) {
|
||||
if (download_info.is_swizzle) {
|
||||
Image& image = slot_images[download_info.object_id];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
image.DownloadMemory(download_map, copies);
|
||||
download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
|
||||
}
|
||||
}
|
||||
uncommitted_async_buffers.emplace_back(download_map);
|
||||
}
|
||||
async_buffers.emplace_back(download_map);
|
||||
async_buffers.emplace_back(std::move(uncommitted_async_buffers));
|
||||
uncommitted_async_buffers.clear();
|
||||
}
|
||||
committed_downloads.emplace_back(std::move(uncommitted_downloads));
|
||||
uncommitted_downloads.clear();
|
||||
@@ -691,39 +708,57 @@ void TextureCache<P>::PopAsyncFlushes() {
|
||||
return;
|
||||
}
|
||||
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
|
||||
const std::span<const ImageId> download_ids = committed_downloads.front();
|
||||
const auto& download_ids = committed_downloads.front();
|
||||
if (download_ids.empty()) {
|
||||
committed_downloads.pop_front();
|
||||
async_buffers.pop_front();
|
||||
return;
|
||||
}
|
||||
auto download_map = *async_buffers.front();
|
||||
std::span<u8> download_span = download_map.mapped_span;
|
||||
auto download_map = std::move(async_buffers.front());
|
||||
for (size_t i = download_ids.size(); i > 0; i--) {
|
||||
const ImageBase& image = slot_images[download_ids[i - 1]];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
|
||||
std::span<u8> download_span_alt = download_span.subspan(download_map.offset);
|
||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt,
|
||||
swizzle_data_buffer);
|
||||
auto& download_info = download_ids[i - 1];
|
||||
auto& download_buffer = download_map[download_info.async_buffer_id];
|
||||
if (download_info.is_swizzle) {
|
||||
const ImageBase& image = slot_images[download_info.object_id];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
|
||||
std::span<u8> download_span =
|
||||
download_buffer.mapped_span.subspan(download_buffer.offset);
|
||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
|
||||
swizzle_data_buffer);
|
||||
} else {
|
||||
const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id];
|
||||
std::span<u8> download_span =
|
||||
download_buffer.mapped_span.subspan(download_buffer.offset);
|
||||
gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(),
|
||||
buffer_info.size);
|
||||
slot_buffer_downloads.erase(download_info.object_id);
|
||||
}
|
||||
}
|
||||
for (auto& download_buffer : download_map) {
|
||||
async_buffers_death_ring.emplace_back(download_buffer);
|
||||
}
|
||||
runtime.FreeDeferredStagingBuffer(download_map);
|
||||
committed_downloads.pop_front();
|
||||
async_buffers.pop_front();
|
||||
} else {
|
||||
const std::span<const ImageId> download_ids = committed_downloads.front();
|
||||
const auto& download_ids = committed_downloads.front();
|
||||
if (download_ids.empty()) {
|
||||
committed_downloads.pop_front();
|
||||
return;
|
||||
}
|
||||
size_t total_size_bytes = 0;
|
||||
for (const ImageId image_id : download_ids) {
|
||||
total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
|
||||
for (const PendingDownload& download_info : download_ids) {
|
||||
if (download_info.is_swizzle) {
|
||||
total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes;
|
||||
}
|
||||
}
|
||||
auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
|
||||
const size_t original_offset = download_map.offset;
|
||||
for (const ImageId image_id : download_ids) {
|
||||
Image& image = slot_images[image_id];
|
||||
for (const PendingDownload& download_info : download_ids) {
|
||||
if (!download_info.is_swizzle) {
|
||||
continue;
|
||||
}
|
||||
Image& image = slot_images[download_info.object_id];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
image.DownloadMemory(download_map, copies);
|
||||
download_map.offset += image.unswizzled_size_bytes;
|
||||
@@ -732,8 +767,11 @@ void TextureCache<P>::PopAsyncFlushes() {
|
||||
runtime.Finish();
|
||||
download_map.offset = original_offset;
|
||||
std::span<u8> download_span = download_map.mapped_span;
|
||||
for (const ImageId image_id : download_ids) {
|
||||
const ImageBase& image = slot_images[image_id];
|
||||
for (const PendingDownload& download_info : download_ids) {
|
||||
if (!download_info.is_swizzle) {
|
||||
continue;
|
||||
}
|
||||
const ImageBase& image = slot_images[download_info.object_id];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
|
||||
swizzle_data_buffer);
|
||||
@@ -833,6 +871,33 @@ std::pair<typename TextureCache<P>::Image*, BufferImageCopy> TextureCache<P>::Dm
|
||||
return {image, copy};
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::DownloadImageIntoBuffer(typename TextureCache<P>::Image* image,
|
||||
typename TextureCache<P>::BufferType buffer,
|
||||
size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies,
|
||||
GPUVAddr address, size_t size) {
|
||||
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
|
||||
const BufferDownload new_buffer_download{address, size};
|
||||
auto slot = slot_buffer_downloads.insert(new_buffer_download);
|
||||
const PendingDownload new_download{false, uncommitted_async_buffers.size(), slot};
|
||||
uncommitted_downloads.emplace_back(new_download);
|
||||
auto download_map = runtime.DownloadStagingBuffer(size, true);
|
||||
uncommitted_async_buffers.emplace_back(download_map);
|
||||
std::array buffers{
|
||||
buffer,
|
||||
download_map.buffer,
|
||||
};
|
||||
std::array buffer_offsets{
|
||||
buffer_offset,
|
||||
download_map.offset,
|
||||
};
|
||||
image->DownloadMemory(buffers, buffer_offsets, copies);
|
||||
} else {
|
||||
image->DownloadMemory(buffer, buffer_offset, copies);
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
|
||||
if (False(image.flags & ImageFlagBits::CpuModified)) {
|
||||
@@ -2209,7 +2274,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
|
||||
if (new_id) {
|
||||
const ImageViewBase& old_view = slot_image_views[new_id];
|
||||
if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
|
||||
uncommitted_downloads.push_back(old_view.image_id);
|
||||
const PendingDownload new_download{true, 0, old_view.image_id};
|
||||
uncommitted_downloads.emplace_back(new_download);
|
||||
}
|
||||
}
|
||||
*old_id = new_id;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
@@ -40,14 +40,9 @@ struct ChannelState;
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
using Tegra::Texture::SwizzleSource;
|
||||
using Tegra::Texture::TICEntry;
|
||||
using Tegra::Texture::TSCEntry;
|
||||
using VideoCore::Surface::GetFormatType;
|
||||
using VideoCore::Surface::IsCopyCompatible;
|
||||
using VideoCore::Surface::PixelFormat;
|
||||
using VideoCore::Surface::PixelFormatFromDepthFormat;
|
||||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
|
||||
using namespace Common::Literals;
|
||||
|
||||
struct ImageViewInOut {
|
||||
@@ -119,6 +114,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
|
||||
using Sampler = typename P::Sampler;
|
||||
using Framebuffer = typename P::Framebuffer;
|
||||
using AsyncBuffer = typename P::AsyncBuffer;
|
||||
using BufferType = typename P::BufferType;
|
||||
|
||||
struct BlitImages {
|
||||
ImageId dst_id;
|
||||
@@ -215,6 +211,10 @@ public:
|
||||
const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand,
|
||||
const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image);
|
||||
|
||||
void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies,
|
||||
GPUVAddr address = 0, size_t size = 0);
|
||||
|
||||
/// Return true when a CPU region is modified from the GPU
|
||||
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
|
||||
|
||||
@@ -424,17 +424,32 @@ private:
|
||||
u64 critical_memory;
|
||||
size_t critical_gc;
|
||||
|
||||
struct BufferDownload {
|
||||
GPUVAddr address;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
struct PendingDownload {
|
||||
bool is_swizzle;
|
||||
size_t async_buffer_id;
|
||||
SlotId object_id;
|
||||
};
|
||||
|
||||
SlotVector<Image> slot_images;
|
||||
SlotVector<ImageMapView> slot_map_views;
|
||||
SlotVector<ImageView> slot_image_views;
|
||||
SlotVector<ImageAlloc> slot_image_allocs;
|
||||
SlotVector<Sampler> slot_samplers;
|
||||
SlotVector<Framebuffer> slot_framebuffers;
|
||||
SlotVector<BufferDownload> slot_buffer_downloads;
|
||||
|
||||
// TODO: This data structure is not optimal and it should be reworked
|
||||
std::vector<ImageId> uncommitted_downloads;
|
||||
std::deque<std::vector<ImageId>> committed_downloads;
|
||||
std::deque<std::optional<AsyncBuffer>> async_buffers;
|
||||
|
||||
std::vector<PendingDownload> uncommitted_downloads;
|
||||
std::deque<std::vector<PendingDownload>> committed_downloads;
|
||||
std::vector<AsyncBuffer> uncommitted_async_buffers;
|
||||
std::deque<std::vector<AsyncBuffer>> async_buffers;
|
||||
std::deque<AsyncBuffer> async_buffers_death_ring;
|
||||
|
||||
struct LRUItemParams {
|
||||
using ObjectType = ImageId;
|
||||
|
||||
@@ -145,7 +145,6 @@
|
||||
FEATURE_NAME(robustness2, robustImageAccess2) \
|
||||
FEATURE_NAME(shader_demote_to_helper_invocation, shaderDemoteToHelperInvocation) \
|
||||
FEATURE_NAME(shader_draw_parameters, shaderDrawParameters) \
|
||||
FEATURE_NAME(timeline_semaphore, timelineSemaphore) \
|
||||
FEATURE_NAME(variable_pointer, variablePointers) \
|
||||
FEATURE_NAME(variable_pointer, variablePointersStorageBuffer)
|
||||
|
||||
@@ -158,6 +157,7 @@
|
||||
FEATURE_NAME(provoking_vertex, provokingVertexLast) \
|
||||
FEATURE_NAME(shader_float16_int8, shaderFloat16) \
|
||||
FEATURE_NAME(shader_float16_int8, shaderInt8) \
|
||||
FEATURE_NAME(timeline_semaphore, timelineSemaphore) \
|
||||
FEATURE_NAME(transform_feedback, transformFeedback) \
|
||||
FEATURE_NAME(uniform_buffer_standard_layout, uniformBufferStandardLayout) \
|
||||
FEATURE_NAME(vertex_input_dynamic_state, vertexInputDynamicState)
|
||||
@@ -493,6 +493,10 @@ public:
|
||||
return extensions.shader_atomic_int64;
|
||||
}
|
||||
|
||||
bool HasTimelineSemaphore() const {
|
||||
return features.timeline_semaphore.timelineSemaphore;
|
||||
}
|
||||
|
||||
/// Returns the minimum supported version of SPIR-V.
|
||||
u32 SupportedSpirvVersion() const {
|
||||
if (instance_version >= VK_API_VERSION_1_3) {
|
||||
|
||||
Reference in New Issue
Block a user