Compare commits
52 Commits
__refs_pul
...
__refs_pul
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fcc5155601 | ||
|
|
e67630b51e | ||
|
|
bd14653417 | ||
|
|
2e89719d3e | ||
|
|
baaafbd5ea | ||
|
|
3476f5b4d3 | ||
|
|
bdf17fe0cc | ||
|
|
54ef9302a2 | ||
|
|
e56a444da9 | ||
|
|
8fe118bcaa | ||
|
|
c56a0e3c34 | ||
|
|
fecffeb0dd | ||
|
|
9608f51cde | ||
|
|
e4ed5bc836 | ||
|
|
de5d431eec | ||
|
|
8da753ab81 | ||
|
|
d923766042 | ||
|
|
a9877c8f65 | ||
|
|
2e7802ad7d | ||
|
|
3a338d9286 | ||
|
|
84b542c386 | ||
|
|
0135b328ed | ||
|
|
a970709d5d | ||
|
|
534abf9d97 | ||
|
|
5224cc49c4 | ||
|
|
b82b093108 | ||
|
|
cf0a7cd1c1 | ||
|
|
424e90f0f5 | ||
|
|
e12a07079e | ||
|
|
fcc5ffdfdd | ||
|
|
4cafc24a4e | ||
|
|
68c44ca0ee | ||
|
|
e858a72a22 | ||
|
|
4db8acd30a | ||
|
|
b8c1dca62f | ||
|
|
0eb39922f6 | ||
|
|
0af7e93763 | ||
|
|
6ff7906ddc | ||
|
|
ce722e317b | ||
|
|
6f6bba3ff1 | ||
|
|
d7298ec262 | ||
|
|
66f4f86a82 | ||
|
|
63a70c253e | ||
|
|
9e74d6238e | ||
|
|
75bba25009 | ||
|
|
7b6519741b | ||
|
|
d6a1a43854 | ||
|
|
eb2633f3ef | ||
|
|
094f6003e0 | ||
|
|
98b940052c | ||
|
|
e5ee0afe6f | ||
|
|
a1fb8a331f |
@@ -188,7 +188,7 @@ if (ENABLE_SDL2)
|
||||
if (YUZU_USE_BUNDLED_SDL2)
|
||||
# Detect toolchain and platform
|
||||
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
|
||||
set(SDL2_VER "SDL2-2.0.5")
|
||||
set(SDL2_VER "SDL2-2.0.8")
|
||||
else()
|
||||
message(FATAL_ERROR "No bundled SDL2 binaries for your toolchain. Disable YUZU_USE_BUNDLED_SDL2 and provide your own.")
|
||||
endif()
|
||||
|
||||
@@ -26,6 +26,18 @@ AudioRenderer::AudioRenderer(AudioRendererParameter params,
|
||||
QueueMixedBuffer(2);
|
||||
}
|
||||
|
||||
u32 AudioRenderer::GetSampleRate() const {
|
||||
return worker_params.sample_rate;
|
||||
}
|
||||
|
||||
u32 AudioRenderer::GetSampleCount() const {
|
||||
return worker_params.sample_count;
|
||||
}
|
||||
|
||||
u32 AudioRenderer::GetMixBufferCount() const {
|
||||
return worker_params.mix_buffer_count;
|
||||
}
|
||||
|
||||
std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) {
|
||||
// Copy UpdateDataHeader struct
|
||||
UpdateDataHeader config{};
|
||||
|
||||
@@ -26,7 +26,7 @@ enum class PlayState : u8 {
|
||||
struct AudioRendererParameter {
|
||||
u32_le sample_rate;
|
||||
u32_le sample_count;
|
||||
u32_le unknown_8;
|
||||
u32_le mix_buffer_count;
|
||||
u32_le unknown_c;
|
||||
u32_le voice_count;
|
||||
u32_le sink_count;
|
||||
@@ -160,6 +160,9 @@ public:
|
||||
std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params);
|
||||
void QueueMixedBuffer(Buffer::Tag tag);
|
||||
void ReleaseAndQueueBuffers();
|
||||
u32 GetSampleRate() const;
|
||||
u32 GetSampleCount() const;
|
||||
u32 GetMixBufferCount() const;
|
||||
|
||||
private:
|
||||
class VoiceState {
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
|
||||
#include "audio_core/cubeb_sink.h"
|
||||
#include "audio_core/stream.h"
|
||||
@@ -66,6 +67,8 @@ public:
|
||||
return;
|
||||
}
|
||||
|
||||
std::lock_guard lock{queue_mutex};
|
||||
|
||||
queue.reserve(queue.size() + samples.size() * GetNumChannels());
|
||||
|
||||
if (is_6_channel) {
|
||||
@@ -94,6 +97,7 @@ private:
|
||||
u32 num_channels{};
|
||||
bool is_6_channel{};
|
||||
|
||||
std::mutex queue_mutex;
|
||||
std::vector<s16> queue;
|
||||
|
||||
static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
|
||||
@@ -153,6 +157,8 @@ long SinkStreamImpl::DataCallback(cubeb_stream* stream, void* user_data, const v
|
||||
return {};
|
||||
}
|
||||
|
||||
std::lock_guard lock{impl->queue_mutex};
|
||||
|
||||
const size_t frames_to_write{
|
||||
std::min(impl->queue.size() / impl->GetNumChannels(), static_cast<size_t>(num_frames))};
|
||||
|
||||
|
||||
@@ -86,7 +86,16 @@ public:
|
||||
}
|
||||
|
||||
void AddTicks(u64 ticks) override {
|
||||
CoreTiming::AddTicks(ticks - num_interpreted_instructions);
|
||||
// Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
|
||||
// rough approximation of the amount of executed ticks in the system, it may be thrown off
|
||||
// if not all cores are doing a similar amount of work. Instead of doing this, we should
|
||||
// device a way so that timing is consistent across all cores without increasing the ticks 4
|
||||
// times.
|
||||
u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES;
|
||||
// Always execute at least one tick.
|
||||
amortized_ticks = std::max<u64>(amortized_ticks, 1);
|
||||
|
||||
CoreTiming::AddTicks(amortized_ticks);
|
||||
num_interpreted_instructions = 0;
|
||||
}
|
||||
u64 GetTicksRemaining() override {
|
||||
@@ -234,9 +243,7 @@ void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) {
|
||||
}
|
||||
|
||||
void ARM_Dynarmic::PrepareReschedule() {
|
||||
if (jit->IsExecuting()) {
|
||||
jit->HaltExecution();
|
||||
}
|
||||
jit->HaltExecution();
|
||||
}
|
||||
|
||||
void ARM_Dynarmic::ClearInstructionCache() {
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "core/core_timing.h"
|
||||
#include "core/hle/kernel/scheduler.h"
|
||||
#include "core/hle/kernel/thread.h"
|
||||
#include "core/hle/lock.h"
|
||||
#include "core/settings.h"
|
||||
|
||||
namespace Core {
|
||||
@@ -90,6 +91,7 @@ void Cpu::RunLoop(bool tight_loop) {
|
||||
LOG_TRACE(Core, "Core-{} idling", core_index);
|
||||
|
||||
if (IsMainCore()) {
|
||||
// TODO(Subv): Only let CoreTiming idle if all 4 cores are idling.
|
||||
CoreTiming::Idle();
|
||||
CoreTiming::Advance();
|
||||
}
|
||||
@@ -125,6 +127,8 @@ void Cpu::Reschedule() {
|
||||
}
|
||||
|
||||
reschedule_pending = false;
|
||||
// Lock the global kernel mutex when we manipulate the HLE state
|
||||
std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
|
||||
scheduler->Reschedule();
|
||||
}
|
||||
|
||||
|
||||
@@ -79,7 +79,7 @@ private:
|
||||
std::shared_ptr<CpuBarrier> cpu_barrier;
|
||||
std::shared_ptr<Kernel::Scheduler> scheduler;
|
||||
|
||||
bool reschedule_pending{};
|
||||
std::atomic<bool> reschedule_pending = false;
|
||||
size_t core_index;
|
||||
};
|
||||
|
||||
|
||||
@@ -135,11 +135,9 @@ void ClearPendingEvents() {
|
||||
void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
|
||||
ASSERT(event_type != nullptr);
|
||||
s64 timeout = GetTicks() + cycles_into_future;
|
||||
|
||||
// If this event needs to be scheduled before the next advance(), force one early
|
||||
if (!is_global_timer_sane)
|
||||
ForceExceptionCheck(cycles_into_future);
|
||||
|
||||
event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
|
||||
std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
|
||||
}
|
||||
|
||||
@@ -107,19 +107,19 @@ VirtualFile XCI::GetNCAFileByType(NCAContentType type) const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<VfsFile>> XCI::GetFiles() const {
|
||||
std::vector<VirtualFile> XCI::GetFiles() const {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<VfsDirectory>> XCI::GetSubdirectories() const {
|
||||
return std::vector<std::shared_ptr<VfsDirectory>>();
|
||||
std::vector<VirtualDir> XCI::GetSubdirectories() const {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string XCI::GetName() const {
|
||||
return file->GetName();
|
||||
}
|
||||
|
||||
std::shared_ptr<VfsDirectory> XCI::GetParentDirectory() const {
|
||||
VirtualDir XCI::GetParentDirectory() const {
|
||||
return file->GetContainingDirectory();
|
||||
}
|
||||
|
||||
|
||||
@@ -71,13 +71,13 @@ public:
|
||||
std::shared_ptr<NCA> GetNCAByType(NCAContentType type) const;
|
||||
VirtualFile GetNCAFileByType(NCAContentType type) const;
|
||||
|
||||
std::vector<std::shared_ptr<VfsFile>> GetFiles() const override;
|
||||
std::vector<VirtualFile> GetFiles() const override;
|
||||
|
||||
std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override;
|
||||
std::vector<VirtualDir> GetSubdirectories() const override;
|
||||
|
||||
std::string GetName() const override;
|
||||
|
||||
std::shared_ptr<VfsDirectory> GetParentDirectory() const override;
|
||||
VirtualDir GetParentDirectory() const override;
|
||||
|
||||
protected:
|
||||
bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override;
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
#include "core/file_sys/vfs.h"
|
||||
|
||||
namespace Loader {
|
||||
enum class ResultStatus;
|
||||
enum class ResultStatus : u16;
|
||||
}
|
||||
|
||||
namespace FileSys {
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
#include "partition_filesystem.h"
|
||||
|
||||
namespace Loader {
|
||||
enum class ResultStatus;
|
||||
enum class ResultStatus : u16;
|
||||
}
|
||||
|
||||
namespace FileSys {
|
||||
|
||||
@@ -74,15 +74,15 @@ VirtualFile VfsFilesystem::CopyFile(std::string_view old_path_, std::string_view
|
||||
return new_file;
|
||||
}
|
||||
|
||||
VirtualFile VfsFilesystem::MoveFile(std::string_view old_path_, std::string_view new_path_) {
|
||||
const auto old_path = FileUtil::SanitizePath(old_path_);
|
||||
const auto new_path = FileUtil::SanitizePath(new_path_);
|
||||
VirtualFile VfsFilesystem::MoveFile(std::string_view old_path, std::string_view new_path) {
|
||||
const auto sanitized_old_path = FileUtil::SanitizePath(old_path);
|
||||
const auto sanitized_new_path = FileUtil::SanitizePath(new_path);
|
||||
|
||||
// Again, non-default impls are highly encouraged to provide a more optimized version of this.
|
||||
auto out = CopyFile(old_path_, new_path_);
|
||||
auto out = CopyFile(sanitized_old_path, sanitized_new_path);
|
||||
if (out == nullptr)
|
||||
return nullptr;
|
||||
if (DeleteFile(old_path))
|
||||
if (DeleteFile(sanitized_old_path))
|
||||
return out;
|
||||
return nullptr;
|
||||
}
|
||||
@@ -137,15 +137,15 @@ VirtualDir VfsFilesystem::CopyDirectory(std::string_view old_path_, std::string_
|
||||
return new_dir;
|
||||
}
|
||||
|
||||
VirtualDir VfsFilesystem::MoveDirectory(std::string_view old_path_, std::string_view new_path_) {
|
||||
const auto old_path = FileUtil::SanitizePath(old_path_);
|
||||
const auto new_path = FileUtil::SanitizePath(new_path_);
|
||||
VirtualDir VfsFilesystem::MoveDirectory(std::string_view old_path, std::string_view new_path) {
|
||||
const auto sanitized_old_path = FileUtil::SanitizePath(old_path);
|
||||
const auto sanitized_new_path = FileUtil::SanitizePath(new_path);
|
||||
|
||||
// Non-default impls are highly encouraged to provide a more optimized version of this.
|
||||
auto out = CopyDirectory(old_path_, new_path_);
|
||||
auto out = CopyDirectory(sanitized_old_path, sanitized_new_path);
|
||||
if (out == nullptr)
|
||||
return nullptr;
|
||||
if (DeleteDirectory(old_path))
|
||||
if (DeleteDirectory(sanitized_old_path))
|
||||
return out;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -15,9 +15,9 @@
|
||||
|
||||
namespace FileSys {
|
||||
|
||||
struct VfsFilesystem;
|
||||
struct VfsFile;
|
||||
struct VfsDirectory;
|
||||
class VfsDirectory;
|
||||
class VfsFile;
|
||||
class VfsFilesystem;
|
||||
|
||||
// Convenience typedefs to use Vfs* interfaces
|
||||
using VirtualFilesystem = std::shared_ptr<VfsFilesystem>;
|
||||
@@ -34,8 +34,9 @@ enum class VfsEntryType {
|
||||
// A class representing an abstract filesystem. A default implementation given the root VirtualDir
|
||||
// is provided for convenience, but if the Vfs implementation has any additional state or
|
||||
// functionality, they will need to override.
|
||||
struct VfsFilesystem : NonCopyable {
|
||||
VfsFilesystem(VirtualDir root);
|
||||
class VfsFilesystem : NonCopyable {
|
||||
public:
|
||||
explicit VfsFilesystem(VirtualDir root);
|
||||
virtual ~VfsFilesystem();
|
||||
|
||||
// Gets the friendly name for the filesystem.
|
||||
@@ -81,7 +82,8 @@ protected:
|
||||
};
|
||||
|
||||
// A class representing a file in an abstract filesystem.
|
||||
struct VfsFile : NonCopyable {
|
||||
class VfsFile : NonCopyable {
|
||||
public:
|
||||
virtual ~VfsFile();
|
||||
|
||||
// Retrieves the file name.
|
||||
@@ -179,7 +181,8 @@ struct VfsFile : NonCopyable {
|
||||
};
|
||||
|
||||
// A class representing a directory in an abstract filesystem.
|
||||
struct VfsDirectory : NonCopyable {
|
||||
class VfsDirectory : NonCopyable {
|
||||
public:
|
||||
virtual ~VfsDirectory();
|
||||
|
||||
// Retrives the file located at path as if the current directory was root. Returns nullptr if
|
||||
@@ -295,7 +298,8 @@ protected:
|
||||
|
||||
// A convenience partial-implementation of VfsDirectory that stubs out methods that should only work
|
||||
// if writable. This is to avoid redundant empty methods everywhere.
|
||||
struct ReadOnlyVfsDirectory : public VfsDirectory {
|
||||
class ReadOnlyVfsDirectory : public VfsDirectory {
|
||||
public:
|
||||
bool IsWritable() const override;
|
||||
bool IsReadable() const override;
|
||||
std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override;
|
||||
|
||||
@@ -15,7 +15,8 @@ namespace FileSys {
|
||||
// Similar to seeking to an offset.
|
||||
// If the file is writable, operations that would write past the end of the offset file will expand
|
||||
// the size of this wrapper.
|
||||
struct OffsetVfsFile : public VfsFile {
|
||||
class OffsetVfsFile : public VfsFile {
|
||||
public:
|
||||
OffsetVfsFile(std::shared_ptr<VfsFile> file, size_t size, size_t offset = 0,
|
||||
std::string new_name = "", VirtualDir new_parent = nullptr);
|
||||
|
||||
|
||||
@@ -10,7 +10,8 @@ namespace FileSys {
|
||||
|
||||
// An implementation of VfsDirectory that maintains two vectors for subdirectories and files.
|
||||
// Vector data is supplied upon construction.
|
||||
struct VectorVfsDirectory : public VfsDirectory {
|
||||
class VectorVfsDirectory : public VfsDirectory {
|
||||
public:
|
||||
explicit VectorVfsDirectory(std::vector<VirtualFile> files = {},
|
||||
std::vector<VirtualDir> dirs = {}, VirtualDir parent = nullptr,
|
||||
std::string name = "");
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
unsigned int Object::next_object_id;
|
||||
std::atomic<u32> Object::next_object_id{0};
|
||||
|
||||
/// Initialize the kernel
|
||||
void Init() {
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
@@ -42,8 +43,8 @@ public:
|
||||
virtual ~Object();
|
||||
|
||||
/// Returns a unique identifier for the object. For debugging purposes only.
|
||||
unsigned int GetObjectId() const {
|
||||
return object_id;
|
||||
u32 GetObjectId() const {
|
||||
return object_id.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
virtual std::string GetTypeName() const {
|
||||
@@ -61,23 +62,23 @@ public:
|
||||
bool IsWaitable() const;
|
||||
|
||||
public:
|
||||
static unsigned int next_object_id;
|
||||
static std::atomic<u32> next_object_id;
|
||||
|
||||
private:
|
||||
friend void intrusive_ptr_add_ref(Object*);
|
||||
friend void intrusive_ptr_release(Object*);
|
||||
|
||||
unsigned int ref_count = 0;
|
||||
unsigned int object_id = next_object_id++;
|
||||
std::atomic<u32> ref_count{0};
|
||||
std::atomic<u32> object_id{next_object_id++};
|
||||
};
|
||||
|
||||
// Special functions used by boost::instrusive_ptr to do automatic ref-counting
|
||||
inline void intrusive_ptr_add_ref(Object* object) {
|
||||
++object->ref_count;
|
||||
object->ref_count.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
inline void intrusive_ptr_release(Object* object) {
|
||||
if (--object->ref_count == 0) {
|
||||
if (object->ref_count.fetch_sub(1, std::memory_order_acq_rel) == 1) {
|
||||
delete object;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -532,7 +532,6 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
|
||||
CASCADE_RESULT(thread->guest_handle, g_handle_table.Create(thread));
|
||||
*out_handle = thread->guest_handle;
|
||||
|
||||
Core::System::GetInstance().PrepareReschedule();
|
||||
Core::System::GetInstance().CpuCore(thread->processor_id).PrepareReschedule();
|
||||
|
||||
LOG_TRACE(Kernel_SVC,
|
||||
@@ -706,8 +705,7 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
|
||||
Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
|
||||
auto owner = g_handle_table.Get<Thread>(owner_handle);
|
||||
ASSERT(owner);
|
||||
ASSERT(thread->status != ThreadStatus::Running);
|
||||
thread->status = ThreadStatus::WaitMutex;
|
||||
ASSERT(thread->status == ThreadStatus::WaitMutex);
|
||||
thread->wakeup_callback = nullptr;
|
||||
|
||||
owner->AddMutexWaiter(thread);
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "core/hle/kernel/object.h"
|
||||
#include "core/hle/kernel/process.h"
|
||||
#include "core/hle/kernel/thread.h"
|
||||
#include "core/hle/lock.h"
|
||||
#include "core/hle/result.h"
|
||||
#include "core/memory.h"
|
||||
|
||||
@@ -104,6 +105,10 @@ void ExitCurrentThread() {
|
||||
*/
|
||||
static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) {
|
||||
const auto proper_handle = static_cast<Handle>(thread_handle);
|
||||
|
||||
// Lock the global kernel mutex when we enter the kernel HLE.
|
||||
std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
|
||||
|
||||
SharedPtr<Thread> thread = wakeup_callback_handle_table.Get<Thread>(proper_handle);
|
||||
if (thread == nullptr) {
|
||||
LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle);
|
||||
@@ -155,8 +160,10 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
|
||||
if (nanoseconds == -1)
|
||||
return;
|
||||
|
||||
CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(nanoseconds), ThreadWakeupEventType,
|
||||
callback_handle);
|
||||
// This function might be called from any thread so we have to be cautious and use the
|
||||
// thread-safe version of ScheduleEvent.
|
||||
CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds), ThreadWakeupEventType,
|
||||
callback_handle);
|
||||
}
|
||||
|
||||
void Thread::CancelWakeupTimer() {
|
||||
@@ -419,12 +426,33 @@ VAddr Thread::GetCommandBufferAddress() const {
|
||||
}
|
||||
|
||||
void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
|
||||
if (thread->lock_owner == this) {
|
||||
// If the thread is already waiting for this thread to release the mutex, ensure that the
|
||||
// waiters list is consistent and return without doing anything.
|
||||
auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
|
||||
ASSERT(itr != wait_mutex_threads.end());
|
||||
return;
|
||||
}
|
||||
|
||||
// A thread can't wait on two different mutexes at the same time.
|
||||
ASSERT(thread->lock_owner == nullptr);
|
||||
|
||||
// Ensure that the thread is not already in the list of mutex waiters
|
||||
auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
|
||||
ASSERT(itr == wait_mutex_threads.end());
|
||||
|
||||
thread->lock_owner = this;
|
||||
wait_mutex_threads.emplace_back(std::move(thread));
|
||||
UpdatePriority();
|
||||
}
|
||||
|
||||
void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) {
|
||||
ASSERT(thread->lock_owner == this);
|
||||
|
||||
// Ensure that the thread is in the list of mutex waiters
|
||||
auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
|
||||
ASSERT(itr != wait_mutex_threads.end());
|
||||
|
||||
boost::remove_erase(wait_mutex_threads, thread);
|
||||
thread->lock_owner = nullptr;
|
||||
UpdatePriority();
|
||||
|
||||
@@ -20,9 +20,9 @@ public:
|
||||
explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params)
|
||||
: ServiceFramework("IAudioRenderer") {
|
||||
static const FunctionInfo functions[] = {
|
||||
{0, nullptr, "GetAudioRendererSampleRate"},
|
||||
{1, nullptr, "GetAudioRendererSampleCount"},
|
||||
{2, nullptr, "GetAudioRendererMixBufferCount"},
|
||||
{0, &IAudioRenderer::GetAudioRendererSampleRate, "GetAudioRendererSampleRate"},
|
||||
{1, &IAudioRenderer::GetAudioRendererSampleCount, "GetAudioRendererSampleCount"},
|
||||
{2, &IAudioRenderer::GetAudioRendererMixBufferCount, "GetAudioRendererMixBufferCount"},
|
||||
{3, nullptr, "GetAudioRendererState"},
|
||||
{4, &IAudioRenderer::RequestUpdateAudioRenderer, "RequestUpdateAudioRenderer"},
|
||||
{5, &IAudioRenderer::StartAudioRenderer, "StartAudioRenderer"},
|
||||
@@ -45,6 +45,27 @@ private:
|
||||
system_event->Signal();
|
||||
}
|
||||
|
||||
void GetAudioRendererSampleRate(Kernel::HLERequestContext& ctx) {
|
||||
IPC::ResponseBuilder rb{ctx, 3};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.Push<u32>(renderer->GetSampleRate());
|
||||
LOG_DEBUG(Service_Audio, "called");
|
||||
}
|
||||
|
||||
void GetAudioRendererSampleCount(Kernel::HLERequestContext& ctx) {
|
||||
IPC::ResponseBuilder rb{ctx, 3};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.Push<u32>(renderer->GetSampleCount());
|
||||
LOG_DEBUG(Service_Audio, "called");
|
||||
}
|
||||
|
||||
void GetAudioRendererMixBufferCount(Kernel::HLERequestContext& ctx) {
|
||||
IPC::ResponseBuilder rb{ctx, 3};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.Push<u32>(renderer->GetMixBufferCount());
|
||||
LOG_DEBUG(Service_Audio, "called");
|
||||
}
|
||||
|
||||
void RequestUpdateAudioRenderer(Kernel::HLERequestContext& ctx) {
|
||||
ctx.WriteBuffer(renderer->UpdateAudioRenderer(ctx.ReadBuffer()));
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
@@ -169,7 +190,8 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") {
|
||||
{1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
|
||||
{2, &AudRenU::GetAudioDevice, "GetAudioDevice"},
|
||||
{3, nullptr, "OpenAudioRendererAuto"},
|
||||
{4, nullptr, "GetAudioDeviceServiceWithRevisionInfo"},
|
||||
{4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo,
|
||||
"GetAudioDeviceServiceWithRevisionInfo"},
|
||||
};
|
||||
RegisterHandlers(functions);
|
||||
}
|
||||
@@ -189,7 +211,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
|
||||
IPC::RequestParser rp{ctx};
|
||||
auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
|
||||
|
||||
u64 buffer_sz = Common::AlignUp(4 * params.unknown_8, 0x40);
|
||||
u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
|
||||
buffer_sz += params.unknown_c * 1024;
|
||||
buffer_sz += 0x940 * (params.unknown_c + 1);
|
||||
buffer_sz += 0x3F0 * params.voice_count;
|
||||
@@ -197,7 +219,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
|
||||
buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
|
||||
buffer_sz +=
|
||||
Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) *
|
||||
(params.unknown_8 + 6),
|
||||
(params.mix_buffer_count + 6),
|
||||
0x40);
|
||||
|
||||
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
|
||||
@@ -253,6 +275,16 @@ void AudRenU::GetAudioDevice(Kernel::HLERequestContext& ctx) {
|
||||
LOG_DEBUG(Service_Audio, "called");
|
||||
}
|
||||
|
||||
void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
|
||||
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
|
||||
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
rb.PushIpcInterface<Audio::IAudioDevice>();
|
||||
|
||||
LOG_WARNING(Service_Audio, "(STUBBED) called"); // TODO(ogniK): Figure out what is different
|
||||
// based on the current revision
|
||||
}
|
||||
|
||||
bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
|
||||
u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
|
||||
switch (feature) {
|
||||
|
||||
@@ -22,6 +22,7 @@ private:
|
||||
void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
|
||||
void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
|
||||
void GetAudioDevice(Kernel::HLERequestContext& ctx);
|
||||
void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);
|
||||
|
||||
enum class AudioFeatures : u32 {
|
||||
Splitter,
|
||||
|
||||
@@ -291,6 +291,7 @@ private:
|
||||
class Hid final : public ServiceFramework<Hid> {
|
||||
public:
|
||||
Hid() : ServiceFramework("hid") {
|
||||
// clang-format off
|
||||
static const FunctionInfo functions[] = {
|
||||
{0, &Hid::CreateAppletResource, "CreateAppletResource"},
|
||||
{1, &Hid::ActivateDebugPad, "ActivateDebugPad"},
|
||||
@@ -333,15 +334,13 @@ public:
|
||||
{102, &Hid::SetSupportedNpadIdType, "SetSupportedNpadIdType"},
|
||||
{103, &Hid::ActivateNpad, "ActivateNpad"},
|
||||
{104, nullptr, "DeactivateNpad"},
|
||||
{106, &Hid::AcquireNpadStyleSetUpdateEventHandle,
|
||||
"AcquireNpadStyleSetUpdateEventHandle"},
|
||||
{107, nullptr, "DisconnectNpad"},
|
||||
{106, &Hid::AcquireNpadStyleSetUpdateEventHandle, "AcquireNpadStyleSetUpdateEventHandle"},
|
||||
{107, &Hid::DisconnectNpad, "DisconnectNpad"},
|
||||
{108, &Hid::GetPlayerLedPattern, "GetPlayerLedPattern"},
|
||||
{109, nullptr, "ActivateNpadWithRevision"},
|
||||
{120, &Hid::SetNpadJoyHoldType, "SetNpadJoyHoldType"},
|
||||
{121, &Hid::GetNpadJoyHoldType, "GetNpadJoyHoldType"},
|
||||
{122, &Hid::SetNpadJoyAssignmentModeSingleByDefault,
|
||||
"SetNpadJoyAssignmentModeSingleByDefault"},
|
||||
{122, &Hid::SetNpadJoyAssignmentModeSingleByDefault, "SetNpadJoyAssignmentModeSingleByDefault"},
|
||||
{123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"},
|
||||
{124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"},
|
||||
{125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"},
|
||||
@@ -398,6 +397,8 @@ public:
|
||||
{1000, nullptr, "SetNpadCommunicationMode"},
|
||||
{1001, nullptr, "GetNpadCommunicationMode"},
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
RegisterHandlers(functions);
|
||||
|
||||
event = Kernel::Event::Create(Kernel::ResetType::OneShot, "hid:EventHandle");
|
||||
@@ -496,6 +497,12 @@ private:
|
||||
LOG_WARNING(Service_HID, "(STUBBED) called");
|
||||
}
|
||||
|
||||
void DisconnectNpad(Kernel::HLERequestContext& ctx) {
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
LOG_WARNING(Service_HID, "(STUBBED) called");
|
||||
}
|
||||
|
||||
void GetPlayerLedPattern(Kernel::HLERequestContext& ctx) {
|
||||
IPC::ResponseBuilder rb{ctx, 2};
|
||||
rb.Push(RESULT_SUCCESS);
|
||||
|
||||
@@ -23,7 +23,7 @@ class HLERequestContext;
|
||||
} // namespace Kernel
|
||||
|
||||
namespace FileSys {
|
||||
struct VfsFilesystem;
|
||||
class VfsFilesystem;
|
||||
}
|
||||
|
||||
namespace Service {
|
||||
|
||||
@@ -126,7 +126,7 @@ constexpr std::array<const char*, 36> RESULT_MESSAGES{
|
||||
};
|
||||
|
||||
std::string GetMessageForResultStatus(ResultStatus status) {
|
||||
return GetMessageForResultStatus(static_cast<size_t>(status));
|
||||
return GetMessageForResultStatus(static_cast<u16>(status));
|
||||
}
|
||||
|
||||
std::string GetMessageForResultStatus(u16 status) {
|
||||
|
||||
@@ -56,7 +56,7 @@ FileType GuessFromFilename(const std::string& name);
|
||||
std::string GetFileTypeString(FileType type);
|
||||
|
||||
/// Return type for functions in Loader namespace
|
||||
enum class ResultStatus {
|
||||
enum class ResultStatus : u16 {
|
||||
Success,
|
||||
ErrorAlreadyLoaded,
|
||||
ErrorNotImplemented,
|
||||
|
||||
@@ -200,6 +200,14 @@ enum class IMinMaxExchange : u64 {
|
||||
XHi = 3,
|
||||
};
|
||||
|
||||
enum class XmadMode : u64 {
|
||||
None = 0,
|
||||
CLo = 1,
|
||||
CHi = 2,
|
||||
CSfu = 3,
|
||||
CBcc = 4,
|
||||
};
|
||||
|
||||
enum class FlowCondition : u64 {
|
||||
Always = 0xF,
|
||||
Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
|
||||
@@ -456,6 +464,18 @@ union Instruction {
|
||||
}
|
||||
} bra;
|
||||
|
||||
union {
|
||||
BitField<20, 16, u64> imm20_16;
|
||||
BitField<36, 1, u64> product_shift_left;
|
||||
BitField<37, 1, u64> merge_37;
|
||||
BitField<48, 1, u64> sign_a;
|
||||
BitField<49, 1, u64> sign_b;
|
||||
BitField<50, 3, XmadMode> mode;
|
||||
BitField<52, 1, u64> high_b;
|
||||
BitField<53, 1, u64> high_a;
|
||||
BitField<56, 1, u64> merge_56;
|
||||
} xmad;
|
||||
|
||||
union {
|
||||
BitField<20, 14, u64> offset;
|
||||
BitField<34, 5, u64> index;
|
||||
@@ -593,6 +613,7 @@ public:
|
||||
IntegerSetPredicate,
|
||||
PredicateSetPredicate,
|
||||
Conversion,
|
||||
Xmad,
|
||||
Unknown,
|
||||
};
|
||||
|
||||
@@ -782,10 +803,10 @@ private:
|
||||
INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
|
||||
INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
|
||||
INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
|
||||
INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"),
|
||||
INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"),
|
||||
INST("010100010-------", Id::XMAD_RC, Type::Arithmetic, "XMAD_RC"),
|
||||
INST("0101101100------", Id::XMAD_RR, Type::Arithmetic, "XMAD_RR"),
|
||||
INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
|
||||
INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
|
||||
INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
|
||||
INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"),
|
||||
};
|
||||
#undef INST
|
||||
std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
|
||||
|
||||
@@ -46,6 +46,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
|
||||
case RenderTargetFormat::RGBA32_FLOAT:
|
||||
case RenderTargetFormat::RGBA32_UINT:
|
||||
return 16;
|
||||
case RenderTargetFormat::RGBA16_UINT:
|
||||
case RenderTargetFormat::RGBA16_FLOAT:
|
||||
case RenderTargetFormat::RG32_FLOAT:
|
||||
return 8;
|
||||
@@ -67,6 +68,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
|
||||
case RenderTargetFormat::R16_UINT:
|
||||
case RenderTargetFormat::R16_SINT:
|
||||
case RenderTargetFormat::R16_FLOAT:
|
||||
case RenderTargetFormat::RG8_UNORM:
|
||||
case RenderTargetFormat::RG8_SNORM:
|
||||
return 2;
|
||||
case RenderTargetFormat::R8_UNORM:
|
||||
|
||||
@@ -20,6 +20,7 @@ enum class RenderTargetFormat : u32 {
|
||||
NONE = 0x0,
|
||||
RGBA32_FLOAT = 0xC0,
|
||||
RGBA32_UINT = 0xC2,
|
||||
RGBA16_UINT = 0xC9,
|
||||
RGBA16_FLOAT = 0xCA,
|
||||
RG32_FLOAT = 0xCB,
|
||||
BGRA8_UNORM = 0xCF,
|
||||
@@ -35,6 +36,7 @@ enum class RenderTargetFormat : u32 {
|
||||
R11G11B10_FLOAT = 0xE0,
|
||||
R32_FLOAT = 0xE5,
|
||||
B5G6R5_UNORM = 0xE8,
|
||||
RG8_UNORM = 0xEA,
|
||||
RG8_SNORM = 0xEB,
|
||||
R16_UNORM = 0xEE,
|
||||
R16_SNORM = 0xEF,
|
||||
|
||||
@@ -36,30 +36,21 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
|
||||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_window{window} {
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window)
|
||||
: emu_window{window}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) {
|
||||
// Create sampler objects
|
||||
for (size_t i = 0; i < texture_samplers.size(); ++i) {
|
||||
texture_samplers[i].Create();
|
||||
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
|
||||
}
|
||||
|
||||
// Create SSBOs
|
||||
for (size_t stage = 0; stage < ssbos.size(); ++stage) {
|
||||
for (size_t buffer = 0; buffer < ssbos[stage].size(); ++buffer) {
|
||||
ssbos[stage][buffer].Create();
|
||||
state.draw.const_buffers[stage][buffer].ssbo = ssbos[stage][buffer].handle;
|
||||
}
|
||||
}
|
||||
|
||||
GLint ext_num;
|
||||
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num);
|
||||
for (GLint i = 0; i < ext_num; i++) {
|
||||
const std::string_view extension{
|
||||
reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))};
|
||||
|
||||
if (extension == "GL_ARB_buffer_storage") {
|
||||
has_ARB_buffer_storage = true;
|
||||
} else if (extension == "GL_ARB_direct_state_access") {
|
||||
if (extension == "GL_ARB_direct_state_access") {
|
||||
has_ARB_direct_state_access = true;
|
||||
} else if (extension == "GL_ARB_separate_shader_objects") {
|
||||
has_ARB_separate_shader_objects = true;
|
||||
@@ -86,47 +77,31 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind
|
||||
|
||||
hw_vao.Create();
|
||||
|
||||
stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER);
|
||||
stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2);
|
||||
state.draw.vertex_buffer = stream_buffer->GetHandle();
|
||||
state.draw.vertex_buffer = stream_buffer.GetHandle();
|
||||
|
||||
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
|
||||
state.draw.shader_program = 0;
|
||||
state.draw.vertex_array = hw_vao.handle;
|
||||
state.Apply();
|
||||
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle());
|
||||
|
||||
for (unsigned index = 0; index < uniform_buffers.size(); ++index) {
|
||||
auto& buffer = uniform_buffers[index];
|
||||
buffer.Create();
|
||||
glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle);
|
||||
glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr,
|
||||
GL_STREAM_COPY);
|
||||
glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle);
|
||||
}
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle());
|
||||
|
||||
glEnable(GL_BLEND);
|
||||
|
||||
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
|
||||
|
||||
LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
|
||||
}
|
||||
|
||||
RasterizerOpenGL::~RasterizerOpenGL() {
|
||||
if (stream_buffer != nullptr) {
|
||||
state.draw.vertex_buffer = stream_buffer->GetHandle();
|
||||
state.Apply();
|
||||
stream_buffer->Release();
|
||||
}
|
||||
}
|
||||
RasterizerOpenGL::~RasterizerOpenGL() {}
|
||||
|
||||
std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
|
||||
GLintptr buffer_offset) {
|
||||
MICROPROFILE_SCOPE(OpenGL_VAO);
|
||||
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
|
||||
const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager;
|
||||
|
||||
state.draw.vertex_array = hw_vao.handle;
|
||||
state.draw.vertex_buffer = stream_buffer->GetHandle();
|
||||
state.draw.vertex_buffer = stream_buffer.GetHandle();
|
||||
state.Apply();
|
||||
|
||||
// Upload all guest vertex arrays sequentially to our buffer
|
||||
@@ -141,16 +116,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
|
||||
ASSERT(end > start);
|
||||
u64 size = end - start + 1;
|
||||
|
||||
// Copy vertex array data
|
||||
Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size);
|
||||
GLintptr vertex_buffer_offset;
|
||||
std::tie(array_ptr, buffer_offset, vertex_buffer_offset) =
|
||||
UploadMemory(array_ptr, buffer_offset, start, size);
|
||||
|
||||
// Bind the vertex array to the buffer at the current offset.
|
||||
glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride);
|
||||
glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset,
|
||||
vertex_array.stride);
|
||||
|
||||
ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented");
|
||||
|
||||
array_ptr += size;
|
||||
buffer_offset += size;
|
||||
}
|
||||
|
||||
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
|
||||
@@ -201,22 +175,12 @@ static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program
|
||||
return program_code;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
|
||||
// Helper function for uploading uniform data
|
||||
const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
|
||||
if (has_ARB_direct_state_access) {
|
||||
glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size);
|
||||
} else {
|
||||
glBindBuffer(GL_COPY_WRITE_BUFFER, handle);
|
||||
glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size);
|
||||
}
|
||||
};
|
||||
|
||||
std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
|
||||
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||
|
||||
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
|
||||
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
|
||||
u32 current_constbuffer_bindpoint = static_cast<u32>(uniform_buffers.size());
|
||||
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
|
||||
u32 current_texture_bindpoint = 0;
|
||||
|
||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
@@ -228,22 +192,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::tie(buffer_ptr, buffer_offset) =
|
||||
AlignBuffer(buffer_ptr, buffer_offset, static_cast<size_t>(uniform_buffer_alignment));
|
||||
|
||||
const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
|
||||
|
||||
GLShader::MaxwellUniformData ubo{};
|
||||
ubo.SetFromRegs(gpu.state.shader_stages[stage]);
|
||||
std::memcpy(buffer_ptr, &ubo, sizeof(ubo));
|
||||
|
||||
// Flush the buffer so that the GPU can see the data we just wrote.
|
||||
glFlushMappedBufferRange(GL_ARRAY_BUFFER, buffer_offset, sizeof(ubo));
|
||||
// Bind the buffer
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, stage, stream_buffer.GetHandle(), buffer_offset,
|
||||
sizeof(ubo));
|
||||
|
||||
// Upload uniform data as one UBO per stage
|
||||
const GLintptr ubo_offset = buffer_offset;
|
||||
copy_buffer(uniform_buffers[stage].handle, ubo_offset,
|
||||
sizeof(GLShader::MaxwellUniformData));
|
||||
|
||||
buffer_ptr += sizeof(GLShader::MaxwellUniformData);
|
||||
buffer_offset += sizeof(GLShader::MaxwellUniformData);
|
||||
buffer_ptr += sizeof(ubo);
|
||||
buffer_offset += sizeof(ubo);
|
||||
|
||||
GLShader::ShaderSetup setup{GetShaderProgramCode(program)};
|
||||
GLShader::ShaderEntries shader_resources;
|
||||
@@ -282,9 +245,9 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
|
||||
static_cast<Maxwell::ShaderStage>(stage));
|
||||
|
||||
// Configure the const buffers for this shader stage.
|
||||
current_constbuffer_bindpoint =
|
||||
SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
|
||||
current_constbuffer_bindpoint, shader_resources.const_buffer_entries);
|
||||
std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = SetupConstBuffers(
|
||||
buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
|
||||
current_constbuffer_bindpoint, shader_resources.const_buffer_entries);
|
||||
|
||||
// Configure the textures for this shader stage.
|
||||
current_texture_bindpoint =
|
||||
@@ -299,6 +262,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
|
||||
}
|
||||
|
||||
shader_program_manager->UseTrivialGeometryShader();
|
||||
|
||||
return {buffer_ptr, buffer_offset};
|
||||
}
|
||||
|
||||
size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
|
||||
@@ -432,6 +397,31 @@ void RasterizerOpenGL::Clear() {
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset,
|
||||
size_t alignment) {
|
||||
// Align the offset, not the mapped pointer
|
||||
GLintptr offset_aligned =
|
||||
static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment));
|
||||
return {buffer_ptr + (offset_aligned - buffer_offset), offset_aligned};
|
||||
}
|
||||
|
||||
std::tuple<u8*, GLintptr, GLintptr> RasterizerOpenGL::UploadMemory(u8* buffer_ptr,
|
||||
GLintptr buffer_offset,
|
||||
Tegra::GPUVAddr gpu_addr,
|
||||
size_t size, size_t alignment) {
|
||||
std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment);
|
||||
GLintptr uploaded_offset = buffer_offset;
|
||||
|
||||
const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager;
|
||||
const boost::optional<VAddr> cpu_addr{memory_manager->GpuToCpuAddress(gpu_addr)};
|
||||
Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
|
||||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
|
||||
return {buffer_ptr, buffer_offset, uploaded_offset};
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::DrawArrays() {
|
||||
if (accelerate_draw == AccelDraw::Disabled)
|
||||
return;
|
||||
@@ -456,7 +446,7 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
|
||||
const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count};
|
||||
|
||||
state.draw.vertex_buffer = stream_buffer->GetHandle();
|
||||
state.draw.vertex_buffer = stream_buffer.GetHandle();
|
||||
state.Apply();
|
||||
|
||||
size_t buffer_size = CalculateVertexArraysSize();
|
||||
@@ -466,41 +456,31 @@ void RasterizerOpenGL::DrawArrays() {
|
||||
}
|
||||
|
||||
// Uniform space for the 5 shader stages
|
||||
buffer_size = Common::AlignUp<size_t>(buffer_size, 4) +
|
||||
sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage;
|
||||
buffer_size =
|
||||
Common::AlignUp<size_t>(buffer_size, 4) +
|
||||
(sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage;
|
||||
|
||||
// Add space for at least 18 constant buffers
|
||||
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
|
||||
|
||||
u8* buffer_ptr;
|
||||
GLintptr buffer_offset;
|
||||
std::tie(buffer_ptr, buffer_offset) =
|
||||
stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4);
|
||||
std::tie(buffer_ptr, buffer_offset, std::ignore) =
|
||||
stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4);
|
||||
u8* buffer_ptr_base = buffer_ptr;
|
||||
|
||||
u8* offseted_buffer;
|
||||
std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset);
|
||||
|
||||
offseted_buffer =
|
||||
reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4));
|
||||
buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4);
|
||||
std::tie(buffer_ptr, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset);
|
||||
|
||||
// If indexed mode, copy the index buffer
|
||||
GLintptr index_buffer_offset = 0;
|
||||
if (is_indexed) {
|
||||
const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager;
|
||||
const boost::optional<VAddr> index_data_addr{
|
||||
memory_manager->GpuToCpuAddress(regs.index_array.StartAddress())};
|
||||
Memory::ReadBlock(*index_data_addr, offseted_buffer, index_buffer_size);
|
||||
|
||||
index_buffer_offset = buffer_offset;
|
||||
offseted_buffer += index_buffer_size;
|
||||
buffer_offset += index_buffer_size;
|
||||
std::tie(buffer_ptr, buffer_offset, index_buffer_offset) = UploadMemory(
|
||||
buffer_ptr, buffer_offset, regs.index_array.StartAddress(), index_buffer_size);
|
||||
}
|
||||
|
||||
offseted_buffer =
|
||||
reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4));
|
||||
buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4);
|
||||
std::tie(buffer_ptr, buffer_offset) = SetupShaders(buffer_ptr, buffer_offset);
|
||||
|
||||
SetupShaders(offseted_buffer, buffer_offset);
|
||||
|
||||
stream_buffer->Unmap();
|
||||
stream_buffer.Unmap(buffer_ptr - buffer_ptr_base);
|
||||
|
||||
shader_program_manager->ApplyTo(state);
|
||||
state.Apply();
|
||||
@@ -647,36 +627,23 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
|
||||
}
|
||||
}
|
||||
|
||||
u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint program,
|
||||
u32 current_bindpoint,
|
||||
const std::vector<GLShader::ConstBufferEntry>& entries) {
|
||||
std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(
|
||||
u8* buffer_ptr, GLintptr buffer_offset, Maxwell::ShaderStage stage, GLuint program,
|
||||
u32 current_bindpoint, const std::vector<GLShader::ConstBufferEntry>& entries) {
|
||||
const auto& gpu = Core::System::GetInstance().GPU();
|
||||
const auto& maxwell3d = gpu.Maxwell3D();
|
||||
|
||||
// Reset all buffer draw state for this stage.
|
||||
for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) {
|
||||
buffer.bindpoint = 0;
|
||||
buffer.enabled = false;
|
||||
}
|
||||
|
||||
// Upload only the enabled buffers from the 16 constbuffers of each shader stage
|
||||
const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)];
|
||||
|
||||
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
|
||||
const auto& used_buffer = entries[bindpoint];
|
||||
const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
|
||||
auto& buffer_draw_state =
|
||||
state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()];
|
||||
|
||||
if (!buffer.enabled) {
|
||||
continue;
|
||||
}
|
||||
|
||||
buffer_draw_state.enabled = true;
|
||||
buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
|
||||
|
||||
boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address);
|
||||
|
||||
size_t size = 0;
|
||||
|
||||
if (used_buffer.IsIndirect()) {
|
||||
@@ -698,25 +665,26 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
|
||||
size = Common::AlignUp(size, sizeof(GLvec4));
|
||||
ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
|
||||
|
||||
std::vector<u8> data(size);
|
||||
Memory::ReadBlock(*addr, data.data(), data.size());
|
||||
GLintptr const_buffer_offset;
|
||||
std::tie(buffer_ptr, buffer_offset, const_buffer_offset) =
|
||||
UploadMemory(buffer_ptr, buffer_offset, buffer.address, size,
|
||||
static_cast<size_t>(uniform_buffer_alignment));
|
||||
|
||||
glBindBuffer(GL_UNIFORM_BUFFER, buffer_draw_state.ssbo);
|
||||
glBufferData(GL_UNIFORM_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW);
|
||||
glBindBuffer(GL_UNIFORM_BUFFER, 0);
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint,
|
||||
stream_buffer.GetHandle(), const_buffer_offset, size);
|
||||
|
||||
// Now configure the bindpoint of the buffer inside the shader
|
||||
const std::string buffer_name = used_buffer.GetName();
|
||||
const GLuint index =
|
||||
glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str());
|
||||
if (index != GL_INVALID_INDEX) {
|
||||
glUniformBlockBinding(program, index, buffer_draw_state.bindpoint);
|
||||
glUniformBlockBinding(program, index, current_bindpoint + bindpoint);
|
||||
}
|
||||
}
|
||||
|
||||
state.Apply();
|
||||
|
||||
return current_bindpoint + static_cast<u32>(entries.size());
|
||||
return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())};
|
||||
}
|
||||
|
||||
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit,
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <glad/glad.h>
|
||||
@@ -100,9 +101,10 @@ private:
|
||||
* @param entries Vector describing the buffers that are actually used in the guest shader.
|
||||
* @returns The next available bindpoint for use in the next shader stage.
|
||||
*/
|
||||
u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program,
|
||||
u32 current_bindpoint,
|
||||
const std::vector<GLShader::ConstBufferEntry>& entries);
|
||||
std::tuple<u8*, GLintptr, u32> SetupConstBuffers(
|
||||
u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
|
||||
GLuint program, u32 current_bindpoint,
|
||||
const std::vector<GLShader::ConstBufferEntry>& entries);
|
||||
|
||||
/*
|
||||
* Configures the current textures to use for the draw command.
|
||||
@@ -139,7 +141,6 @@ private:
|
||||
/// Syncs the blend state to match the guest state
|
||||
void SyncBlendState();
|
||||
|
||||
bool has_ARB_buffer_storage = false;
|
||||
bool has_ARB_direct_state_access = false;
|
||||
bool has_ARB_separate_shader_objects = false;
|
||||
bool has_ARB_vertex_attrib_binding = false;
|
||||
@@ -155,22 +156,24 @@ private:
|
||||
OGLVertexArray hw_vao;
|
||||
|
||||
std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
|
||||
std::array<std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers>,
|
||||
Tegra::Engines::Maxwell3D::Regs::MaxShaderStage>
|
||||
ssbos;
|
||||
|
||||
static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
std::unique_ptr<OGLStreamBuffer> stream_buffer;
|
||||
OGLStreamBuffer stream_buffer;
|
||||
OGLBuffer uniform_buffer;
|
||||
OGLFramebuffer framebuffer;
|
||||
GLint uniform_buffer_alignment;
|
||||
|
||||
size_t CalculateVertexArraysSize() const;
|
||||
|
||||
std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset);
|
||||
|
||||
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers;
|
||||
std::pair<u8*, GLintptr> SetupShaders(u8* buffer_ptr, GLintptr buffer_offset);
|
||||
|
||||
void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset);
|
||||
std::pair<u8*, GLintptr> AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, size_t alignment);
|
||||
|
||||
std::tuple<u8*, GLintptr, GLintptr> UploadMemory(u8* buffer_ptr, GLintptr buffer_offset,
|
||||
Tegra::GPUVAddr gpu_addr, size_t size,
|
||||
size_t alignment = 4);
|
||||
|
||||
enum class AccelDraw { Disabled, Arrays, Indexed };
|
||||
AccelDraw accelerate_draw = AccelDraw::Disabled;
|
||||
|
||||
@@ -101,6 +101,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
|
||||
{GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8
|
||||
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI
|
||||
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
|
||||
{GL_RGBA16UI, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI
|
||||
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
|
||||
false}, // R11FG11FB10F
|
||||
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
|
||||
@@ -134,6 +135,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
|
||||
{GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S
|
||||
{GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F
|
||||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8
|
||||
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
|
||||
{GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
|
||||
|
||||
// DepthStencil formats
|
||||
@@ -234,32 +236,57 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu
|
||||
static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
|
||||
SurfaceParams::MaxPixelFormat>
|
||||
morton_to_gl_fns = {
|
||||
MortonCopy<true, PixelFormat::ABGR8U>, MortonCopy<true, PixelFormat::ABGR8S>,
|
||||
MortonCopy<true, PixelFormat::B5G6R5>, MortonCopy<true, PixelFormat::A2B10G10R10>,
|
||||
MortonCopy<true, PixelFormat::A1B5G5R5>, MortonCopy<true, PixelFormat::R8>,
|
||||
MortonCopy<true, PixelFormat::R8UI>, MortonCopy<true, PixelFormat::RGBA16F>,
|
||||
MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>,
|
||||
MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
|
||||
MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
|
||||
MortonCopy<true, PixelFormat::DXN2UNORM>, MortonCopy<true, PixelFormat::DXN2SNORM>,
|
||||
MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
|
||||
MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>,
|
||||
MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>,
|
||||
MortonCopy<true, PixelFormat::R32F>, MortonCopy<true, PixelFormat::R16F>,
|
||||
MortonCopy<true, PixelFormat::R16UNORM>, MortonCopy<true, PixelFormat::R16S>,
|
||||
MortonCopy<true, PixelFormat::R16UI>, MortonCopy<true, PixelFormat::R16I>,
|
||||
MortonCopy<true, PixelFormat::RG16>, MortonCopy<true, PixelFormat::RG16F>,
|
||||
MortonCopy<true, PixelFormat::RG16UI>, MortonCopy<true, PixelFormat::RG16I>,
|
||||
MortonCopy<true, PixelFormat::RG16S>, MortonCopy<true, PixelFormat::RGB32F>,
|
||||
MortonCopy<true, PixelFormat::SRGBA8>, MortonCopy<true, PixelFormat::RG8S>,
|
||||
MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>,
|
||||
MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>,
|
||||
// clang-format off
|
||||
MortonCopy<true, PixelFormat::ABGR8U>,
|
||||
MortonCopy<true, PixelFormat::ABGR8S>,
|
||||
MortonCopy<true, PixelFormat::B5G6R5>,
|
||||
MortonCopy<true, PixelFormat::A2B10G10R10>,
|
||||
MortonCopy<true, PixelFormat::A1B5G5R5>,
|
||||
MortonCopy<true, PixelFormat::R8>,
|
||||
MortonCopy<true, PixelFormat::R8UI>,
|
||||
MortonCopy<true, PixelFormat::RGBA16F>,
|
||||
MortonCopy<true, PixelFormat::RGBA16UI>,
|
||||
MortonCopy<true, PixelFormat::R11FG11FB10F>,
|
||||
MortonCopy<true, PixelFormat::RGBA32UI>,
|
||||
MortonCopy<true, PixelFormat::DXT1>,
|
||||
MortonCopy<true, PixelFormat::DXT23>,
|
||||
MortonCopy<true, PixelFormat::DXT45>,
|
||||
MortonCopy<true, PixelFormat::DXN1>,
|
||||
MortonCopy<true, PixelFormat::DXN2UNORM>,
|
||||
MortonCopy<true, PixelFormat::DXN2SNORM>,
|
||||
MortonCopy<true, PixelFormat::BC7U>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
|
||||
MortonCopy<true, PixelFormat::G8R8>,
|
||||
MortonCopy<true, PixelFormat::BGRA8>,
|
||||
MortonCopy<true, PixelFormat::RGBA32F>,
|
||||
MortonCopy<true, PixelFormat::RG32F>,
|
||||
MortonCopy<true, PixelFormat::R32F>,
|
||||
MortonCopy<true, PixelFormat::R16F>,
|
||||
MortonCopy<true, PixelFormat::R16UNORM>,
|
||||
MortonCopy<true, PixelFormat::R16S>,
|
||||
MortonCopy<true, PixelFormat::R16UI>,
|
||||
MortonCopy<true, PixelFormat::R16I>,
|
||||
MortonCopy<true, PixelFormat::RG16>,
|
||||
MortonCopy<true, PixelFormat::RG16F>,
|
||||
MortonCopy<true, PixelFormat::RG16UI>,
|
||||
MortonCopy<true, PixelFormat::RG16I>,
|
||||
MortonCopy<true, PixelFormat::RG16S>,
|
||||
MortonCopy<true, PixelFormat::RGB32F>,
|
||||
MortonCopy<true, PixelFormat::SRGBA8>,
|
||||
MortonCopy<true, PixelFormat::RG8U>,
|
||||
MortonCopy<true, PixelFormat::RG8S>,
|
||||
MortonCopy<true, PixelFormat::Z24S8>,
|
||||
MortonCopy<true, PixelFormat::S8Z24>,
|
||||
MortonCopy<true, PixelFormat::Z32F>,
|
||||
MortonCopy<true, PixelFormat::Z16>,
|
||||
MortonCopy<true, PixelFormat::Z32FS8>,
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
|
||||
SurfaceParams::MaxPixelFormat>
|
||||
gl_to_morton_fns = {
|
||||
// clang-format off
|
||||
MortonCopy<false, PixelFormat::ABGR8U>,
|
||||
MortonCopy<false, PixelFormat::ABGR8S>,
|
||||
MortonCopy<false, PixelFormat::B5G6R5>,
|
||||
@@ -268,6 +295,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
|
||||
MortonCopy<false, PixelFormat::R8>,
|
||||
MortonCopy<false, PixelFormat::R8UI>,
|
||||
MortonCopy<false, PixelFormat::RGBA16F>,
|
||||
MortonCopy<false, PixelFormat::RGBA16UI>,
|
||||
MortonCopy<false, PixelFormat::R11FG11FB10F>,
|
||||
MortonCopy<false, PixelFormat::RGBA32UI>,
|
||||
// TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not
|
||||
@@ -297,12 +325,14 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
|
||||
MortonCopy<false, PixelFormat::RG16S>,
|
||||
MortonCopy<false, PixelFormat::RGB32F>,
|
||||
MortonCopy<false, PixelFormat::SRGBA8>,
|
||||
MortonCopy<false, PixelFormat::RG8U>,
|
||||
MortonCopy<false, PixelFormat::RG8S>,
|
||||
MortonCopy<false, PixelFormat::Z24S8>,
|
||||
MortonCopy<false, PixelFormat::S8Z24>,
|
||||
MortonCopy<false, PixelFormat::Z32F>,
|
||||
MortonCopy<false, PixelFormat::Z16>,
|
||||
MortonCopy<false, PixelFormat::Z32FS8>,
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
// Allocate an uninitialized texture of appropriate size and format for the surface
|
||||
|
||||
@@ -31,43 +31,45 @@ struct SurfaceParams {
|
||||
R8 = 5,
|
||||
R8UI = 6,
|
||||
RGBA16F = 7,
|
||||
R11FG11FB10F = 8,
|
||||
RGBA32UI = 9,
|
||||
DXT1 = 10,
|
||||
DXT23 = 11,
|
||||
DXT45 = 12,
|
||||
DXN1 = 13, // This is also known as BC4
|
||||
DXN2UNORM = 14,
|
||||
DXN2SNORM = 15,
|
||||
BC7U = 16,
|
||||
ASTC_2D_4X4 = 17,
|
||||
G8R8 = 18,
|
||||
BGRA8 = 19,
|
||||
RGBA32F = 20,
|
||||
RG32F = 21,
|
||||
R32F = 22,
|
||||
R16F = 23,
|
||||
R16UNORM = 24,
|
||||
R16S = 25,
|
||||
R16UI = 26,
|
||||
R16I = 27,
|
||||
RG16 = 28,
|
||||
RG16F = 29,
|
||||
RG16UI = 30,
|
||||
RG16I = 31,
|
||||
RG16S = 32,
|
||||
RGB32F = 33,
|
||||
SRGBA8 = 34,
|
||||
RG8S = 35,
|
||||
RGBA16UI = 8,
|
||||
R11FG11FB10F = 9,
|
||||
RGBA32UI = 10,
|
||||
DXT1 = 11,
|
||||
DXT23 = 12,
|
||||
DXT45 = 13,
|
||||
DXN1 = 14, // This is also known as BC4
|
||||
DXN2UNORM = 15,
|
||||
DXN2SNORM = 16,
|
||||
BC7U = 17,
|
||||
ASTC_2D_4X4 = 18,
|
||||
G8R8 = 19,
|
||||
BGRA8 = 20,
|
||||
RGBA32F = 21,
|
||||
RG32F = 22,
|
||||
R32F = 23,
|
||||
R16F = 24,
|
||||
R16UNORM = 25,
|
||||
R16S = 26,
|
||||
R16UI = 27,
|
||||
R16I = 28,
|
||||
RG16 = 29,
|
||||
RG16F = 30,
|
||||
RG16UI = 31,
|
||||
RG16I = 32,
|
||||
RG16S = 33,
|
||||
RGB32F = 34,
|
||||
SRGBA8 = 35,
|
||||
RG8U = 36,
|
||||
RG8S = 37,
|
||||
|
||||
MaxColorFormat,
|
||||
|
||||
// DepthStencil formats
|
||||
Z24S8 = 36,
|
||||
S8Z24 = 37,
|
||||
Z32F = 38,
|
||||
Z16 = 39,
|
||||
Z32FS8 = 40,
|
||||
Z24S8 = 38,
|
||||
S8Z24 = 39,
|
||||
Z32F = 40,
|
||||
Z16 = 41,
|
||||
Z32FS8 = 42,
|
||||
|
||||
MaxDepthStencilFormat,
|
||||
|
||||
@@ -113,6 +115,7 @@ struct SurfaceParams {
|
||||
1, // R8
|
||||
1, // R8UI
|
||||
1, // RGBA16F
|
||||
1, // RGBA16UI
|
||||
1, // R11FG11FB10F
|
||||
1, // RGBA32UI
|
||||
4, // DXT1
|
||||
@@ -140,6 +143,7 @@ struct SurfaceParams {
|
||||
1, // RG16S
|
||||
1, // RGB32F
|
||||
1, // SRGBA8
|
||||
1, // RG8U
|
||||
1, // RG8S
|
||||
1, // Z24S8
|
||||
1, // S8Z24
|
||||
@@ -165,6 +169,7 @@ struct SurfaceParams {
|
||||
8, // R8
|
||||
8, // R8UI
|
||||
64, // RGBA16F
|
||||
64, // RGBA16UI
|
||||
32, // R11FG11FB10F
|
||||
128, // RGBA32UI
|
||||
64, // DXT1
|
||||
@@ -192,6 +197,7 @@ struct SurfaceParams {
|
||||
32, // RG16S
|
||||
96, // RGB32F
|
||||
32, // SRGBA8
|
||||
16, // RG8U
|
||||
16, // RG8S
|
||||
32, // Z24S8
|
||||
32, // S8Z24
|
||||
@@ -241,6 +247,8 @@ struct SurfaceParams {
|
||||
return PixelFormat::A2B10G10R10;
|
||||
case Tegra::RenderTargetFormat::RGBA16_FLOAT:
|
||||
return PixelFormat::RGBA16F;
|
||||
case Tegra::RenderTargetFormat::RGBA16_UINT:
|
||||
return PixelFormat::RGBA16UI;
|
||||
case Tegra::RenderTargetFormat::RGBA32_FLOAT:
|
||||
return PixelFormat::RGBA32F;
|
||||
case Tegra::RenderTargetFormat::RG32_FLOAT:
|
||||
@@ -265,6 +273,8 @@ struct SurfaceParams {
|
||||
return PixelFormat::RG16;
|
||||
case Tegra::RenderTargetFormat::RG16_SNORM:
|
||||
return PixelFormat::RG16S;
|
||||
case Tegra::RenderTargetFormat::RG8_UNORM:
|
||||
return PixelFormat::RG8U;
|
||||
case Tegra::RenderTargetFormat::RG8_SNORM:
|
||||
return PixelFormat::RG8S;
|
||||
case Tegra::RenderTargetFormat::R16_FLOAT:
|
||||
@@ -432,6 +442,7 @@ struct SurfaceParams {
|
||||
case Tegra::RenderTargetFormat::RG16_UNORM:
|
||||
case Tegra::RenderTargetFormat::R16_UNORM:
|
||||
case Tegra::RenderTargetFormat::B5G6R5_UNORM:
|
||||
case Tegra::RenderTargetFormat::RG8_UNORM:
|
||||
return ComponentType::UNorm;
|
||||
case Tegra::RenderTargetFormat::RGBA8_SNORM:
|
||||
case Tegra::RenderTargetFormat::RG16_SNORM:
|
||||
@@ -447,6 +458,7 @@ struct SurfaceParams {
|
||||
case Tegra::RenderTargetFormat::R32_FLOAT:
|
||||
return ComponentType::Float;
|
||||
case Tegra::RenderTargetFormat::RGBA32_UINT:
|
||||
case Tegra::RenderTargetFormat::RGBA16_UINT:
|
||||
case Tegra::RenderTargetFormat::RG16_UINT:
|
||||
case Tegra::RenderTargetFormat::R8_UINT:
|
||||
case Tegra::RenderTargetFormat::R16_UINT:
|
||||
|
||||
@@ -376,6 +376,8 @@ public:
|
||||
return value;
|
||||
} else if (type == GLSLRegister::Type::Integer) {
|
||||
return "floatBitsToInt(" + value + ')';
|
||||
} else if (type == GLSLRegister::Type::UnsignedInteger) {
|
||||
return "floatBitsToUint(" + value + ')';
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
@@ -1630,6 +1632,99 @@ private:
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Type::Xmad: {
|
||||
ASSERT_MSG(!instr.xmad.sign_a, "Unimplemented");
|
||||
ASSERT_MSG(!instr.xmad.sign_b, "Unimplemented");
|
||||
|
||||
std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)};
|
||||
std::string op_b;
|
||||
std::string op_c;
|
||||
|
||||
// TODO(bunnei): Needs to be fixed once op_a or op_b is signed
|
||||
ASSERT_MSG(instr.xmad.sign_a == instr.xmad.sign_b, "Unimplemented");
|
||||
const bool is_signed{instr.xmad.sign_a == 1};
|
||||
|
||||
bool is_merge{};
|
||||
switch (opcode->GetId()) {
|
||||
case OpCode::Id::XMAD_CR: {
|
||||
is_merge = instr.xmad.merge_56;
|
||||
op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
|
||||
instr.xmad.sign_b ? GLSLRegister::Type::Integer
|
||||
: GLSLRegister::Type::UnsignedInteger);
|
||||
op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::XMAD_RR: {
|
||||
is_merge = instr.xmad.merge_37;
|
||||
op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.xmad.sign_b);
|
||||
op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::XMAD_RC: {
|
||||
op_b += regs.GetRegisterAsInteger(instr.gpr39, 0, instr.xmad.sign_b);
|
||||
op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
|
||||
is_signed ? GLSLRegister::Type::Integer
|
||||
: GLSLRegister::Type::UnsignedInteger);
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::XMAD_IMM: {
|
||||
is_merge = instr.xmad.merge_37;
|
||||
op_b += std::to_string(instr.xmad.imm20_16);
|
||||
op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->GetName());
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(bunnei): Ensure this is right with signed operands
|
||||
if (instr.xmad.high_a) {
|
||||
op_a = "((" + op_a + ") >> 16)";
|
||||
} else {
|
||||
op_a = "((" + op_a + ") & 0xFFFF)";
|
||||
}
|
||||
|
||||
std::string src2 = '(' + op_b + ')'; // Preserve original source 2
|
||||
if (instr.xmad.high_b) {
|
||||
op_b = '(' + src2 + " >> 16)";
|
||||
} else {
|
||||
op_b = '(' + src2 + " & 0xFFFF)";
|
||||
}
|
||||
|
||||
std::string product = '(' + op_a + " * " + op_b + ')';
|
||||
if (instr.xmad.product_shift_left) {
|
||||
product = '(' + product + " << 16)";
|
||||
}
|
||||
|
||||
switch (instr.xmad.mode) {
|
||||
case Tegra::Shader::XmadMode::None:
|
||||
break;
|
||||
case Tegra::Shader::XmadMode::CLo:
|
||||
op_c = "((" + op_c + ") & 0xFFFF)";
|
||||
break;
|
||||
case Tegra::Shader::XmadMode::CHi:
|
||||
op_c = "((" + op_c + ") >> 16)";
|
||||
break;
|
||||
case Tegra::Shader::XmadMode::CBcc:
|
||||
op_c = "((" + op_c + ") + (" + src2 + "<< 16))";
|
||||
break;
|
||||
default: {
|
||||
LOG_CRITICAL(HW_GPU, "Unhandled XMAD mode: {}",
|
||||
static_cast<u32>(instr.xmad.mode.Value()));
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
std::string sum{'(' + product + " + " + op_c + ')'};
|
||||
if (is_merge) {
|
||||
sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))";
|
||||
}
|
||||
|
||||
regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
switch (opcode->GetId()) {
|
||||
case OpCode::Id::EXIT: {
|
||||
|
||||
@@ -203,21 +203,6 @@ void OpenGLState::Apply() const {
|
||||
}
|
||||
}
|
||||
|
||||
// Constbuffers
|
||||
for (std::size_t stage = 0; stage < draw.const_buffers.size(); ++stage) {
|
||||
for (std::size_t buffer_id = 0; buffer_id < draw.const_buffers[stage].size(); ++buffer_id) {
|
||||
const auto& current = cur_state.draw.const_buffers[stage][buffer_id];
|
||||
const auto& new_state = draw.const_buffers[stage][buffer_id];
|
||||
|
||||
if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint ||
|
||||
current.ssbo != new_state.ssbo) {
|
||||
if (new_state.enabled) {
|
||||
glBindBufferBase(GL_UNIFORM_BUFFER, new_state.bindpoint, new_state.ssbo);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Framebuffer
|
||||
if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
|
||||
|
||||
@@ -119,12 +119,6 @@ public:
|
||||
GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
|
||||
GLuint shader_program; // GL_CURRENT_PROGRAM
|
||||
GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
|
||||
struct ConstBufferConfig {
|
||||
bool enabled = false;
|
||||
GLuint bindpoint;
|
||||
GLuint ssbo;
|
||||
};
|
||||
std::array<std::array<ConstBufferConfig, Regs::MaxConstBuffers>, 5> const_buffers;
|
||||
} draw;
|
||||
|
||||
struct {
|
||||
|
||||
@@ -9,174 +9,91 @@
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
class OrphanBuffer : public OGLStreamBuffer {
|
||||
public:
|
||||
explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {}
|
||||
~OrphanBuffer() override;
|
||||
OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent)
|
||||
: gl_target(target), buffer_size(size) {
|
||||
gl_buffer.Create();
|
||||
glBindBuffer(gl_target, gl_buffer.handle);
|
||||
|
||||
private:
|
||||
void Create(size_t size, size_t sync_subdivide) override;
|
||||
void Release() override;
|
||||
GLsizeiptr allocate_size = size;
|
||||
if (target == GL_ARRAY_BUFFER) {
|
||||
// On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
|
||||
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
|
||||
// probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
|
||||
// vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
|
||||
// crash.
|
||||
allocate_size *= 2;
|
||||
}
|
||||
|
||||
std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override;
|
||||
void Unmap() override;
|
||||
if (GLAD_GL_ARB_buffer_storage) {
|
||||
persistent = true;
|
||||
coherent = prefer_coherent;
|
||||
GLbitfield flags =
|
||||
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
|
||||
glBufferStorage(gl_target, allocate_size, nullptr, flags);
|
||||
mapped_ptr = static_cast<u8*>(glMapBufferRange(
|
||||
gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
|
||||
} else {
|
||||
glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<u8> data;
|
||||
};
|
||||
|
||||
class StorageBuffer : public OGLStreamBuffer {
|
||||
public:
|
||||
explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {}
|
||||
~StorageBuffer() override;
|
||||
|
||||
private:
|
||||
void Create(size_t size, size_t sync_subdivide) override;
|
||||
void Release() override;
|
||||
|
||||
std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override;
|
||||
void Unmap() override;
|
||||
|
||||
struct Fence {
|
||||
OGLSync sync;
|
||||
size_t offset;
|
||||
};
|
||||
std::deque<Fence> head;
|
||||
std::deque<Fence> tail;
|
||||
|
||||
u8* mapped_ptr;
|
||||
};
|
||||
|
||||
OGLStreamBuffer::OGLStreamBuffer(GLenum target) {
|
||||
gl_target = target;
|
||||
OGLStreamBuffer::~OGLStreamBuffer() {
|
||||
if (persistent) {
|
||||
glBindBuffer(gl_target, gl_buffer.handle);
|
||||
glUnmapBuffer(gl_target);
|
||||
}
|
||||
gl_buffer.Release();
|
||||
}
|
||||
|
||||
GLuint OGLStreamBuffer::GetHandle() const {
|
||||
return gl_buffer.handle;
|
||||
}
|
||||
|
||||
std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) {
|
||||
if (storage_buffer) {
|
||||
return std::make_unique<StorageBuffer>(target);
|
||||
}
|
||||
return std::make_unique<OrphanBuffer>(target);
|
||||
GLsizeiptr OGLStreamBuffer::GetSize() const {
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
OrphanBuffer::~OrphanBuffer() {
|
||||
Release();
|
||||
}
|
||||
|
||||
void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) {
|
||||
buffer_pos = 0;
|
||||
buffer_size = size;
|
||||
data.resize(buffer_size);
|
||||
|
||||
if (gl_buffer.handle == 0) {
|
||||
gl_buffer.Create();
|
||||
glBindBuffer(gl_target, gl_buffer.handle);
|
||||
}
|
||||
|
||||
glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW);
|
||||
}
|
||||
|
||||
void OrphanBuffer::Release() {
|
||||
gl_buffer.Release();
|
||||
}
|
||||
|
||||
std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) {
|
||||
buffer_pos = Common::AlignUp(buffer_pos, alignment);
|
||||
|
||||
if (buffer_pos + size > buffer_size) {
|
||||
Create(std::max(buffer_size, size), 0);
|
||||
}
|
||||
|
||||
mapped_size = size;
|
||||
return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos));
|
||||
}
|
||||
|
||||
void OrphanBuffer::Unmap() {
|
||||
glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos),
|
||||
static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]);
|
||||
buffer_pos += mapped_size;
|
||||
}
|
||||
|
||||
StorageBuffer::~StorageBuffer() {
|
||||
Release();
|
||||
}
|
||||
|
||||
void StorageBuffer::Create(size_t size, size_t sync_subdivide) {
|
||||
if (gl_buffer.handle != 0)
|
||||
return;
|
||||
|
||||
buffer_pos = 0;
|
||||
buffer_size = size;
|
||||
buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1);
|
||||
|
||||
gl_buffer.Create();
|
||||
glBindBuffer(gl_target, gl_buffer.handle);
|
||||
|
||||
glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr,
|
||||
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
|
||||
mapped_ptr = reinterpret_cast<u8*>(
|
||||
glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size),
|
||||
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
|
||||
}
|
||||
|
||||
void StorageBuffer::Release() {
|
||||
if (gl_buffer.handle == 0)
|
||||
return;
|
||||
|
||||
glUnmapBuffer(gl_target);
|
||||
|
||||
gl_buffer.Release();
|
||||
head.clear();
|
||||
tail.clear();
|
||||
}
|
||||
|
||||
std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) {
|
||||
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||
ASSERT(size <= buffer_size);
|
||||
|
||||
OGLSync sync;
|
||||
|
||||
buffer_pos = Common::AlignUp(buffer_pos, alignment);
|
||||
size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide);
|
||||
|
||||
if (!head.empty() &&
|
||||
(effective_offset > head.back().offset || buffer_pos + size > buffer_size)) {
|
||||
ASSERT(head.back().sync.handle == 0);
|
||||
head.back().sync.Create();
|
||||
}
|
||||
|
||||
if (buffer_pos + size > buffer_size) {
|
||||
if (!tail.empty()) {
|
||||
std::swap(sync, tail.back().sync);
|
||||
tail.clear();
|
||||
}
|
||||
std::swap(tail, head);
|
||||
buffer_pos = 0;
|
||||
effective_offset = 0;
|
||||
}
|
||||
|
||||
while (!tail.empty() && buffer_pos + size > tail.front().offset) {
|
||||
std::swap(sync, tail.front().sync);
|
||||
tail.pop_front();
|
||||
}
|
||||
|
||||
if (sync.handle != 0) {
|
||||
glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
|
||||
sync.Release();
|
||||
}
|
||||
|
||||
if (head.empty() || effective_offset > head.back().offset) {
|
||||
head.emplace_back();
|
||||
head.back().offset = effective_offset;
|
||||
}
|
||||
|
||||
ASSERT(alignment <= buffer_size);
|
||||
mapped_size = size;
|
||||
return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos));
|
||||
|
||||
if (alignment > 0) {
|
||||
buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment);
|
||||
}
|
||||
|
||||
bool invalidate = false;
|
||||
if (buffer_pos + size > buffer_size) {
|
||||
buffer_pos = 0;
|
||||
invalidate = true;
|
||||
|
||||
if (persistent) {
|
||||
glUnmapBuffer(gl_target);
|
||||
}
|
||||
}
|
||||
|
||||
if (invalidate | !persistent) {
|
||||
GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
|
||||
(coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
|
||||
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
|
||||
mapped_ptr = static_cast<u8*>(
|
||||
glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags));
|
||||
mapped_offset = buffer_pos;
|
||||
}
|
||||
|
||||
return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
|
||||
}
|
||||
|
||||
void StorageBuffer::Unmap() {
|
||||
glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos),
|
||||
static_cast<GLsizeiptr>(mapped_size));
|
||||
buffer_pos += mapped_size;
|
||||
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
||||
ASSERT(size <= mapped_size);
|
||||
|
||||
if (!coherent && size > 0) {
|
||||
glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
|
||||
}
|
||||
|
||||
if (!persistent) {
|
||||
glUnmapBuffer(gl_target);
|
||||
}
|
||||
|
||||
buffer_pos += size;
|
||||
}
|
||||
|
||||
@@ -2,35 +2,41 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <glad/glad.h>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
class OGLStreamBuffer : private NonCopyable {
|
||||
public:
|
||||
explicit OGLStreamBuffer(GLenum target);
|
||||
virtual ~OGLStreamBuffer() = default;
|
||||
|
||||
public:
|
||||
static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target);
|
||||
|
||||
virtual void Create(size_t size, size_t sync_subdivide) = 0;
|
||||
virtual void Release() {}
|
||||
explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false);
|
||||
~OGLStreamBuffer();
|
||||
|
||||
GLuint GetHandle() const;
|
||||
GLsizeiptr GetSize() const;
|
||||
|
||||
virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0;
|
||||
virtual void Unmap() = 0;
|
||||
/*
|
||||
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
|
||||
* and the optional alignment requirement.
|
||||
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
|
||||
* The return values are the pointer to the new chunk, the offset within the buffer,
|
||||
* and the invalidation flag for previous chunks.
|
||||
* The actual used size must be specified on unmapping the chunk.
|
||||
*/
|
||||
std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
|
||||
|
||||
protected:
|
||||
void Unmap(GLsizeiptr size);
|
||||
|
||||
private:
|
||||
OGLBuffer gl_buffer;
|
||||
GLenum gl_target;
|
||||
|
||||
size_t buffer_pos = 0;
|
||||
size_t buffer_size = 0;
|
||||
size_t buffer_sync_subdivide = 0;
|
||||
size_t mapped_size = 0;
|
||||
bool coherent = false;
|
||||
bool persistent = false;
|
||||
|
||||
GLintptr buffer_pos = 0;
|
||||
GLsizeiptr buffer_size = 0;
|
||||
GLintptr mapped_offset = 0;
|
||||
GLsizeiptr mapped_size = 0;
|
||||
u8* mapped_ptr = nullptr;
|
||||
};
|
||||
|
||||
@@ -91,6 +91,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
|
||||
switch (topology) {
|
||||
case Maxwell::PrimitiveTopology::Points:
|
||||
return GL_POINTS;
|
||||
case Maxwell::PrimitiveTopology::LineStrip:
|
||||
return GL_LINE_STRIP;
|
||||
case Maxwell::PrimitiveTopology::Triangles:
|
||||
return GL_TRIANGLES;
|
||||
case Maxwell::PrimitiveTopology::TriangleStrip:
|
||||
|
||||
Reference in New Issue
Block a user