NvFlinger: Split Buffer Wait from VSync on Async VSync.
This commit splits presentation from waiting. Now buffers will be waited for in a different thread and presented on a sepprate thread.
This commit is contained in:
@@ -115,11 +115,36 @@ std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::Ac
|
||||
return *itr;
|
||||
}
|
||||
|
||||
void BufferQueue::ReleaseBuffer(u32 slot) {
|
||||
std::optional<std::reference_wrapper<const BufferQueue::Buffer>>
|
||||
BufferQueue::ObtainPresentBuffer() {
|
||||
auto itr = queue.end();
|
||||
// Iterate to find a queued buffer matching the requested slot.
|
||||
while (itr == queue.end() && !presenting_sequence.empty()) {
|
||||
u32 slot = presenting_sequence.front();
|
||||
itr = std::find_if(queue.begin(), queue.end(), [&slot](const Buffer& buffer) {
|
||||
return buffer.status == Buffer::Status::Presenting && buffer.slot == slot;
|
||||
});
|
||||
presenting_sequence.pop_front();
|
||||
}
|
||||
if (itr == queue.end())
|
||||
return {};
|
||||
return *itr;
|
||||
}
|
||||
|
||||
void BufferQueue::SetToPresentBuffer(u32 slot) {
|
||||
auto itr = std::find_if(queue.begin(), queue.end(),
|
||||
[&](const Buffer& buffer) { return buffer.slot == slot; });
|
||||
ASSERT(itr != queue.end());
|
||||
ASSERT(itr->status == Buffer::Status::Acquired);
|
||||
itr->status = Buffer::Status::Presenting;
|
||||
presenting_sequence.push_back(slot);
|
||||
}
|
||||
|
||||
void BufferQueue::ReleaseBuffer(u32 slot) {
|
||||
auto itr = std::find_if(queue.begin(), queue.end(),
|
||||
[&](const Buffer& buffer) { return buffer.slot == slot; });
|
||||
ASSERT(itr != queue.end());
|
||||
ASSERT(itr->status == Buffer::Status::Presenting || itr->status == Buffer::Status::Acquired);
|
||||
itr->status = Buffer::Status::Free;
|
||||
free_buffers.push_back(slot);
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ public:
|
||||
};
|
||||
|
||||
struct Buffer {
|
||||
enum class Status { Free = 0, Queued = 1, Dequeued = 2, Acquired = 3 };
|
||||
enum class Status { Free = 0, Queued = 1, Dequeued = 2, Acquired = 3, Presenting = 4 };
|
||||
|
||||
u32 slot;
|
||||
Status status = Status::Free;
|
||||
@@ -96,6 +96,8 @@ public:
|
||||
const Common::Rectangle<int>& crop_rect, u32 swap_interval,
|
||||
Service::Nvidia::MultiFence& multi_fence);
|
||||
std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
|
||||
std::optional<std::reference_wrapper<const Buffer>> ObtainPresentBuffer();
|
||||
void SetToPresentBuffer(u32 slot);
|
||||
void ReleaseBuffer(u32 slot);
|
||||
void Disconnect();
|
||||
u32 Query(QueryType type);
|
||||
@@ -115,6 +117,7 @@ private:
|
||||
std::list<u32> free_buffers;
|
||||
std::vector<Buffer> queue;
|
||||
std::list<u32> queue_sequence;
|
||||
std::list<u32> presenting_sequence;
|
||||
Kernel::EventPair buffer_wait_event;
|
||||
};
|
||||
|
||||
|
||||
@@ -35,6 +35,10 @@ void NVFlinger::VSyncThread(NVFlinger& nv_flinger) {
|
||||
nv_flinger.SplitVSync();
|
||||
}
|
||||
|
||||
void NVFlinger::WaitForBuffersThread(NVFlinger& nv_flinger) {
|
||||
nv_flinger.WaitForBuffers();
|
||||
}
|
||||
|
||||
void NVFlinger::SplitVSync() {
|
||||
system.RegisterHostThread();
|
||||
std::string name = "yuzu:VSyncThread";
|
||||
@@ -43,14 +47,37 @@ void NVFlinger::SplitVSync() {
|
||||
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
|
||||
s64 delay = 0;
|
||||
while (is_running) {
|
||||
guard->lock();
|
||||
const s64 time_start = system.CoreTiming().GetGlobalTimeNs().count();
|
||||
Compose();
|
||||
for (auto& display : displays) {
|
||||
// Trigger vsync for this display at the end of drawing
|
||||
SCOPE_EXIT({ display.SignalVSyncEvent(); });
|
||||
|
||||
// Don't do anything for displays without layers.
|
||||
if (!display.HasLayers())
|
||||
continue;
|
||||
|
||||
// TODO(Subv): Support more than 1 layer.
|
||||
VI::Layer& layer = display.GetLayer(0);
|
||||
auto& buffer_queue = layer.GetBufferQueue();
|
||||
|
||||
guard->lock();
|
||||
// Search for a queued buffer and acquire it
|
||||
auto buffer = buffer_queue.ObtainPresentBuffer();
|
||||
guard->unlock();
|
||||
|
||||
if (!buffer) {
|
||||
continue;
|
||||
}
|
||||
|
||||
MicroProfileFlip();
|
||||
|
||||
swap_interval = buffer->get().swap_interval;
|
||||
buffer_queue.ReleaseBuffer(buffer->get().slot);
|
||||
}
|
||||
const auto ticks = GetNextTicks();
|
||||
const s64 time_end = system.CoreTiming().GetGlobalTimeNs().count();
|
||||
const s64 time_passed = time_end - time_start;
|
||||
const s64 next_time = std::max<s64>(0, ticks - time_passed - delay);
|
||||
guard->unlock();
|
||||
if (next_time > 0) {
|
||||
wait_event->WaitFor(std::chrono::nanoseconds{next_time});
|
||||
}
|
||||
@@ -58,6 +85,63 @@ void NVFlinger::SplitVSync() {
|
||||
}
|
||||
}
|
||||
|
||||
void NVFlinger::WaitForBuffers() {
|
||||
system.RegisterHostThread();
|
||||
std::string name = "yuzu:WaitBufferQueueThread";
|
||||
MicroProfileOnThreadCreate(name.c_str());
|
||||
Common::SetCurrentThreadName(name.c_str());
|
||||
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
|
||||
s64 delay = 0;
|
||||
while (is_running) {
|
||||
for (auto& display : displays) {
|
||||
// Don't do anything for displays without layers.
|
||||
if (!display.HasLayers())
|
||||
continue;
|
||||
|
||||
// TODO(Subv): Support more than 1 layer.
|
||||
VI::Layer& layer = display.GetLayer(0);
|
||||
auto& buffer_queue = layer.GetBufferQueue();
|
||||
|
||||
guard->lock();
|
||||
|
||||
// Search for a queued buffer and acquire it
|
||||
auto buffer = buffer_queue.AcquireBuffer();
|
||||
|
||||
guard->unlock();
|
||||
if (!buffer) {
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& igbp_buffer = buffer->get().igbp_buffer;
|
||||
// Now send the buffer to the GPU for drawing.
|
||||
// TODO(Subv): Support more than just disp0. The display device selection is probably
|
||||
// based on which display we're drawing (Default, Internal, External, etc)
|
||||
auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");
|
||||
ASSERT(nvdisp);
|
||||
|
||||
nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format,
|
||||
igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
|
||||
buffer->get().transform, buffer->get().crop_rect);
|
||||
|
||||
auto& gpu = system.GPU();
|
||||
const auto& multi_fence = buffer->get().multi_fence;
|
||||
|
||||
for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
|
||||
const auto& fence = multi_fence.fences[fence_id];
|
||||
gpu.WaitFence(fence.id, fence.value);
|
||||
}
|
||||
|
||||
buffer_queue.SetToPresentBuffer(buffer->get().slot);
|
||||
}
|
||||
queue_event->Wait();
|
||||
}
|
||||
}
|
||||
|
||||
void NVFlinger::NotifyQueue() {
|
||||
queue_event->Set();
|
||||
}
|
||||
|
||||
NVFlinger::NVFlinger(Core::System& system) : system(system) {
|
||||
displays.emplace_back(0, "Default", system);
|
||||
displays.emplace_back(1, "External", system);
|
||||
@@ -78,7 +162,9 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) {
|
||||
if (system.IsMulticore()) {
|
||||
is_running = true;
|
||||
wait_event = std::make_unique<Common::Event>();
|
||||
queue_event = std::make_unique<Common::Event>();
|
||||
vsync_thread = std::make_unique<std::thread>(VSyncThread, std::ref(*this));
|
||||
buffer_thread = std::make_unique<std::thread>(WaitForBuffersThread, std::ref(*this));
|
||||
} else {
|
||||
system.CoreTiming().ScheduleEvent(frame_ticks, composition_event);
|
||||
}
|
||||
@@ -88,9 +174,13 @@ NVFlinger::~NVFlinger() {
|
||||
if (system.IsMulticore()) {
|
||||
is_running = false;
|
||||
wait_event->Set();
|
||||
queue_event->Set();
|
||||
vsync_thread->join();
|
||||
buffer_thread->join();
|
||||
vsync_thread.reset();
|
||||
buffer_thread.reset();
|
||||
wait_event.reset();
|
||||
queue_event.reset();
|
||||
} else {
|
||||
system.CoreTiming().UnscheduleEvent(composition_event, 0);
|
||||
}
|
||||
|
||||
@@ -80,6 +80,9 @@ public:
|
||||
/// Obtains a buffer queue identified by the ID.
|
||||
const BufferQueue& FindBufferQueue(u32 id) const;
|
||||
|
||||
/// On queueing buffer for rendering
|
||||
void NotifyQueue();
|
||||
|
||||
/// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
|
||||
/// finished.
|
||||
void Compose();
|
||||
@@ -104,8 +107,10 @@ private:
|
||||
const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const;
|
||||
|
||||
static void VSyncThread(NVFlinger& nv_flinger);
|
||||
static void WaitForBuffersThread(NVFlinger& nv_flinger);
|
||||
|
||||
void SplitVSync();
|
||||
void WaitForBuffers();
|
||||
|
||||
std::shared_ptr<Nvidia::Module> nvdrv;
|
||||
|
||||
@@ -128,7 +133,9 @@ private:
|
||||
Core::System& system;
|
||||
|
||||
std::unique_ptr<std::thread> vsync_thread;
|
||||
std::unique_ptr<std::thread> buffer_thread;
|
||||
std::unique_ptr<Common::Event> wait_event;
|
||||
std::unique_ptr<Common::Event> queue_event;
|
||||
std::atomic<bool> is_running{};
|
||||
};
|
||||
|
||||
|
||||
@@ -580,6 +580,7 @@ private:
|
||||
buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
|
||||
request.data.GetCropRect(), request.data.swap_interval,
|
||||
request.data.multi_fence);
|
||||
nv_flinger->NotifyQueue();
|
||||
|
||||
IGBPQueueBufferResponseParcel response{1280, 720};
|
||||
ctx.WriteBuffer(response.Serialize());
|
||||
|
||||
Reference in New Issue
Block a user