Compare commits

...

79 Commits

Author SHA1 Message Date
ReaperOfSouls
bfb35a8fee Revert "Revert "Yield types"" 2018-11-25 01:03:54 -04:00
ReaperOfSouls
362919eaca Merge pull request #32 from ReaperOfSouls1909/revert-30-yield-types
Revert "Yield types"
2018-11-25 01:03:52 -04:00
ReaperOfSouls
ddc3a76809 Revert "Yield types" 2018-11-25 01:03:44 -04:00
ReaperOfSouls
2531ee5809 Merge pull request #31 from ReaperOfSouls1909/revert-29-map-physical-memory
Revert "Map physical memory"
2018-11-25 01:03:28 -04:00
ReaperOfSouls
ca998e9c0d Revert "Map physical memory" 2018-11-25 01:01:31 -04:00
ReaperOfSouls
dc248dc35d Merge pull request #30 from DarkLordZach/yield-types
Yield types
2018-11-25 00:58:39 -04:00
ReaperOfSouls
f0ec00a9cd Add files via upload 2018-11-25 00:58:29 -04:00
ReaperOfSouls
317e407e47 Delete svc.cpp 2018-11-25 00:55:35 -04:00
ReaperOfSouls
a6aa9b5db4 Merge pull request #29 from ogniK5377/map-physical-memory
Map physical memory
2018-11-24 20:54:47 -04:00
ReaperOfSouls
63f11c1a83 Merge pull request #28 from bunnei/sleepthread
svc: Improve SleepThread for yield types.
2018-11-24 20:46:01 -04:00
ReaperOfSouls
dcedf48591 Merge pull request #27 from yuzu-emu/master
Merge master
2018-11-24 20:45:26 -04:00
ReaperOfSouls
9e2d8336ba Merge pull request #26 from ReaperOfSouls1909/revert-1-dc
Revert "Dc"
2018-11-24 20:45:03 -04:00
ReaperOfSouls
515027236f Revert "Dc" 2018-11-24 20:44:53 -04:00
ReaperOfSouls
81133f35e0 Merge pull request #25 from ReaperOfSouls1909/revert-2-core-mgr
Revert "core: Relocate CPU core management to its own class"
2018-11-24 20:44:06 -04:00
ReaperOfSouls
2fb716e6d4 Revert "core: Relocate CPU core management to its own class" 2018-11-24 20:43:56 -04:00
ReaperOfSouls
1fdd669c64 Merge pull request #24 from ReaperOfSouls1909/revert-7-shader_cache
Revert "shader_cache: Only lock covered instructions."
2018-11-24 20:43:35 -04:00
ReaperOfSouls
4ef96a5ea1 Revert "shader_cache: Only lock covered instructions." 2018-11-24 20:43:26 -04:00
ReaperOfSouls
4f62c27d05 Merge pull request #23 from ReaperOfSouls1909/revert-11-bfi
Revert "gl_shader_decompiler: Implement BFI_IMM_R"
2018-11-24 20:42:50 -04:00
ReaperOfSouls
0c1524936b Revert "gl_shader_decompiler: Implement BFI_IMM_R" 2018-11-24 20:42:41 -04:00
ReaperOfSouls
156a41b8cc Merge pull request #22 from ReaperOfSouls1909/revert-13-r2p
Revert "gl_shader_decompiler: Implement R2P_IMM"
2018-11-24 20:42:12 -04:00
ReaperOfSouls
e1be6bb2a5 Revert "gl_shader_decompiler: Implement R2P_IMM" 2018-11-24 20:42:00 -04:00
ReaperOfSouls
dcd78037ac Merge pull request #21 from ReaperOfSouls1909/revert-14-clip-distances
Revert "gl_shader_decompiler: Implement clip distances"
2018-11-24 20:41:42 -04:00
ReaperOfSouls
63d5e92a92 Revert "gl_shader_decompiler: Implement clip distances" 2018-11-24 20:41:32 -04:00
ReaperOfSouls
30b45751ea Merge pull request #20 from ReaperOfSouls1909/revert-10-pred-comp-11
Revert "Pred comp 11"
2018-11-24 20:40:49 -04:00
ReaperOfSouls
50237cb9ed Revert "Pred comp 11" 2018-11-24 20:40:40 -04:00
ReaperOfSouls
2fde25ed4d Merge pull request #19 from ReaperOfSouls1909/revert-12-getgputime
Revert "nvhost_ctrl_gpu: Implement IoctlGetGpuTime."
2018-11-24 20:39:47 -04:00
ReaperOfSouls
2e4955632e Revert "nvhost_ctrl_gpu: Implement IoctlGetGpuTime." 2018-11-24 20:39:38 -04:00
ReaperOfSouls
ee4d1cc92f Merge pull request #18 from ReaperOfSouls1909/revert-4-fix-txq
Revert "Properly Implemented TXQ Instruction"
2018-11-24 20:39:12 -04:00
ReaperOfSouls
0248933ef3 Revert "Properly Implemented TXQ Instruction" 2018-11-24 20:39:02 -04:00
ReaperOfSouls
46c2c936a1 Merge pull request #17 from ReaperOfSouls1909/revert-5-master
Revert "Add support for clear_flags register"
2018-11-24 20:37:54 -04:00
ReaperOfSouls
276efa7bae Revert "Add support for clear_flags register" 2018-11-24 20:37:44 -04:00
ReaperOfSouls
7a3f0b8154 Merge pull request #16 from Tinob/DepthClamp
Implement depth clamp
2018-11-23 11:48:05 -04:00
ReaperOfSouls
cab0eba6d4 Merge pull request #15 from ReaperOfSouls1909/revert-8-master
Revert "Polyfix"
2018-11-23 11:47:20 -04:00
ReaperOfSouls
34a5a4e4aa Revert "Polyfix" 2018-11-23 11:47:12 -04:00
Rodolfo Bogado
35bb416308 Implement depth clamp 2018-11-23 12:11:21 -03:00
ReaperOfSouls
ec7345a1e8 Merge pull request #8 from marcosvitali/master
Polyfix
2018-11-23 10:58:01 -04:00
ReaperOfSouls
f328d25933 Merge pull request #14 from ReinUsesLisp/clip-distances
gl_shader_decompiler: Implement clip distances
2018-11-23 10:57:09 -04:00
ReaperOfSouls
dd9fc99901 Merge pull request #13 from ReinUsesLisp/r2p
gl_shader_decompiler: Implement R2P_IMM
2018-11-23 10:56:09 -04:00
ReaperOfSouls
aa5e70b410 Merge pull request #12 from bunnei/getgputime
nvhost_ctrl_gpu: Implement IoctlGetGpuTime.
2018-11-23 10:55:48 -04:00
ReaperOfSouls
930f76f31e Merge pull request #11 from ReinUsesLisp/bfi
gl_shader_decompiler: Implement BFI_IMM_R
2018-11-23 10:55:26 -04:00
ReaperOfSouls
cb00001882 Merge pull request #10 from Hexagon12/pred-comp-11
Pred comp 11
2018-11-23 10:54:49 -04:00
ReaperOfSouls
9b51d92d3a Merge pull request #9 from FernandoS27/tex-spacing
Implemented Tile Width Spacing
2018-11-23 10:53:52 -04:00
Marcos Vitali
cd94dc484a Initialize polygon_offset in the constructor. 2018-11-23 10:38:06 -03:00
ReaperOfSouls
d557f4abea Merge pull request #7 from degasus/shader_cache
shader_cache: Only lock covered instructions.
2018-11-23 09:18:35 -04:00
ReaperOfSouls
f0b51bf713 Merge pull request #6 from bunnei/ldg
Ldg
2018-11-23 09:14:13 -04:00
Marcos Vitali
ddeb7809f7 Clang Format fixes. 2018-11-23 10:10:48 -03:00
ReaperOfSouls
07009688c8 Merge pull request #5 from Tinob/master
Add support for clear_flags register
2018-11-23 08:59:59 -04:00
ReaperOfSouls
c30f30da50 Merge pull request #4 from FernandoS27/fix-txq
Properly Implemented TXQ Instruction
2018-11-23 08:58:42 -04:00
ReaperOfSouls
dffc128448 Merge pull request #3 from FernandoS27/ignore-assert-dev
Implement better Ignore Assert
2018-11-23 08:57:51 -04:00
ReaperOfSouls
5cbade867f Merge pull request #2 from lioncash/core-mgr
core: Relocate CPU core management to its own class
2018-11-23 08:57:34 -04:00
ReaperOfSouls
cde484e32a Merge pull request #1 from FernandoS27/dc
Dc
2018-11-23 08:57:15 -04:00
FernandoS27
e012b3e1fe Fix Texture Overlapping 2018-11-23 08:56:41 -04:00
Rodolfo Bogado
de8001cfcd Add support for clear_flags register 2018-11-23 09:39:18 -03:00
Marcos Vitali
33ba10591e GPU States: Implement Polygon Offset. This is used in SMO all the time. 2018-11-23 03:01:33 -03:00
ReinUsesLisp
b3853403b7 gl_shader_decompiler: Implement clip distances 2018-11-23 02:14:43 -03:00
FernandoS27
0c465ca9c9 Fix TEXS Instruction encodings 2018-11-22 22:51:25 -04:00
FernandoS27
0eeda68d19 Fix one encoding in TEX Instruction 2018-11-22 22:08:19 -04:00
FernandoS27
1f895d68d1 Corrected inputs indexing in TEX instruction 2018-11-22 22:08:19 -04:00
Zach Hilman
820d81b9a5 scheduler: Add explanations for YieldWith and WithoutLoadBalancing 2018-11-22 00:33:53 -05:00
ReinUsesLisp
642dfeda2a gl_shader_decompiler: Implement BFI_IMM_R 2018-11-21 16:12:30 -03:00
ReinUsesLisp
d92afc7493 gl_shader_decompiler: Implement R2P_IMM 2018-11-21 04:56:00 -03:00
Hexagon12
76de2d0656 Clang fix 2018-11-20 18:57:44 +02:00
Hexagon12
9fbe79320b oops 2018-11-19 21:36:49 +02:00
Hexagon12
ae6e074f4a Added predicate comparison LessEqualWithNan 2018-11-19 21:27:51 +02:00
FernandoS27
fab4934f03 Implemented Tile Width Spacing 2018-11-19 14:09:42 -04:00
Zach Hilman
409dcf0e0a svc: Implement yield types 0 and -1 2018-11-18 23:44:19 -05:00
David Marcec
e6d205ec25 Updated SetMemoryPermission to new api 2018-11-17 15:22:17 +11:00
David Marcec
9327033580 Merge branch 'master' of https://github.com/yuzu-emu/yuzu into map-physical-memory 2018-11-17 15:17:31 +11:00
David Marcec
e840aa610a Added svc error checking 2018-11-16 00:21:23 +11:00
David Marcec
0d284a52bb Final fixups 2018-11-16 00:08:23 +11:00
David Marcec
d68162a7c9 Initial implementation of svcMapPhysicalMemory 2018-11-15 14:57:28 +11:00
bunnei
eadf2c070a gl_global_cache: Ensure buffer size does not exceed UBO maximum.
- Fixes crash with Xenoblade Chronicles 2.
2018-11-12 19:15:09 -05:00
bunnei
0c3eb8e318 gl_global_cache: Optimize caching to eliminate unnecessary resource management. 2018-11-12 19:15:09 -05:00
bunnei
13edd9ee68 gl_rasterizer: Cache global region uniform locations and refactor. 2018-11-12 19:15:09 -05:00
bunnei
ba1c8d935c gl_global_cache: Use const reference for GetGlobalRegion argument. 2018-11-12 19:15:09 -05:00
Zach Hilman
e84b02a351 gl_rasterizer: Add caching for global memory regions 2018-11-12 19:15:08 -05:00
Zach Hilman
2b18ce1248 Preliminary implementation of LDG
Works by approximating the value of the final address using the last IADD_C operation and then reading 16kb following that address. Currently a hackeuristic.
2018-11-12 18:28:42 -05:00
bunnei
5583fe1377 svc: Improve SleepThread for yield types.
- Fixes Super Mario Party.
2018-10-23 19:06:08 -04:00
FernandoS27
babd3581ee Implement better Ignore Assert 2018-09-17 14:14:48 -04:00
26 changed files with 718 additions and 99 deletions

View File

@@ -28,18 +28,14 @@ __declspec(noinline, noreturn)
}
#define ASSERT(_a_) \
do \
if (!(_a_)) { \
assert_noinline_call([] { LOG_CRITICAL(Debug, "Assertion Failed!"); }); \
} \
while (0)
if (!(_a_)) { \
LOG_CRITICAL(Debug, "Assertion Failed!"); \
}
#define ASSERT_MSG(_a_, ...) \
do \
if (!(_a_)) { \
assert_noinline_call([&] { LOG_CRITICAL(Debug, "Assertion Failed!\n" __VA_ARGS__); }); \
} \
while (0)
if (!(_a_)) { \
LOG_CRITICAL(Debug, "Assertion Failed! " __VA_ARGS__); \
}
#define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!")
#define UNREACHABLE_MSG(...) ASSERT_MSG(false, __VA_ARGS__)

View File

@@ -49,6 +49,22 @@ struct ThreadQueueList {
return T();
}
template <typename UnaryPredicate>
T get_first_filter(UnaryPredicate filter) const {
const Queue* cur = first;
while (cur != nullptr) {
if (!cur->data.empty()) {
for (const auto& item : cur->data) {
if (filter(item))
return item;
}
}
cur = cur->next_nonempty;
}
return T();
}
T pop_first() {
Queue* cur = first;
while (cur != nullptr) {

View File

@@ -9,6 +9,7 @@
#include "common/logging/log.h"
#include "core/arm/arm_interface.h"
#include "core/core.h"
#include "core/core_cpu.h"
#include "core/core_timing.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/process.h"
@@ -169,6 +170,16 @@ void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
ready_queue.remove(priority, thread);
}
void Scheduler::MoveThreadToBackOfPriorityQueue(Thread* thread, u32 priority) {
std::lock_guard<std::mutex> lock(scheduler_mutex);
// Thread is not in queue
ASSERT(ready_queue.contains(thread) != -1);
ready_queue.remove(priority, thread);
ready_queue.push_back(priority, thread);
}
void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
std::lock_guard<std::mutex> lock(scheduler_mutex);
@@ -179,4 +190,64 @@ void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
ready_queue.prepare(priority);
}
Thread* Scheduler::GetNextSuggestedThread(u32 core) const {
std::lock_guard<std::mutex> lock(scheduler_mutex);
const u32 mask = 1U << core;
return ready_queue.get_first_filter(
[mask](Thread const* thread) { return (thread->GetAffinityMask() & mask) != 0; });
}
void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
ASSERT(thread != nullptr);
// Avoid yielding if the thread isn't even running.
ASSERT(thread->GetStatus() == ThreadStatus::Running);
// Sanity check that the priority is valid
ASSERT(thread->GetPriority() < THREADPRIO_COUNT);
// Yield this thread
MoveThreadToBackOfPriorityQueue(thread, thread->GetPriority());
Reschedule();
}
void Scheduler::YieldWithLoadBalancing(Thread* thread) {
ASSERT(thread != nullptr);
const auto priority = thread->GetPriority();
const auto core = static_cast<u32>(thread->GetProcessorID());
// Avoid yielding if the thread isn't even running.
ASSERT(thread->GetStatus() == ThreadStatus::Running);
// Sanity check that the priority is valid
ASSERT(priority < THREADPRIO_COUNT);
// Reschedule thread to end of queue.
MoveThreadToBackOfPriorityQueue(thread, priority);
Thread* suggested_thread = nullptr;
// Search through all of the cpu cores (except this one) for a suggested thread.
// Take the first non-nullptr one
for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
if (cur_core == core)
continue;
const auto res =
Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core);
if (res != nullptr) {
suggested_thread = res;
break;
}
}
// If a suggested thread was found, queue that for this core
if (suggested_thread != nullptr)
suggested_thread->ChangeCore(core, suggested_thread->GetAffinityMask());
}
void Scheduler::YieldAndWaitForLoadBalancing(Thread* thread) {
UNIMPLEMENTED_MSG("Wait for load balancing thread yield type is not implemented!");
}
} // namespace Kernel

View File

@@ -48,9 +48,81 @@ public:
/// Unschedules a thread that was already scheduled
void UnscheduleThread(Thread* thread, u32 priority);
/// Moves a thread to the back of the current priority queue
void MoveThreadToBackOfPriorityQueue(Thread* thread, u32 priority);
/// Sets the priority of a thread in the scheduler
void SetThreadPriority(Thread* thread, u32 priority);
/// Gets the next suggested thread for load balancing
Thread* GetNextSuggestedThread(u32 core) const;
/**
* YieldWithoutLoadBalancing -- analogous to normal yield on a system
* Moves the thread to the end of the ready queue for its priority, and then reschedules the
* system to the new head of the queue.
*
* Example (Single Core -- but can be extrapolated to multi):
* ready_queue[prio=0]: ThreadA, ThreadB, ThreadC (->exec order->)
* Currently Running: ThreadR
*
* ThreadR calls YieldWithoutLoadBalancing
*
* ThreadR is moved to the end of ready_queue[prio=0]:
* ready_queue[prio=0]: ThreadA, ThreadB, ThreadC, ThreadR (->exec order->)
* Currently Running: Nothing
*
* System is rescheduled (ThreadA is popped off of queue):
* ready_queue[prio=0]: ThreadB, ThreadC, ThreadR (->exec order->)
* Currently Running: ThreadA
*
* If the queue is empty at time of call, no yielding occurs. This does not cross between cores
* or priorities at all.
*/
void YieldWithoutLoadBalancing(Thread* thread);
/**
* YieldWithLoadBalancing -- yield but with better selection of the new running thread
* Moves the current thread to the end of the ready queue for its priority, then selects a
* 'suggested thread' (a thread on a different core that could run on this core) from the
* scheduler, changes its core, and reschedules the current core to that thread.
*
* Example (Dual Core -- can be extrapolated to Quad Core, this is just normal yield if it were
* single core):
* ready_queue[core=0][prio=0]: ThreadA, ThreadB (affinities not pictured as irrelevant
* ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only]
* Currently Running: ThreadQ on Core 0 || ThreadP on Core 1
*
* ThreadQ calls YieldWithLoadBalancing
*
* ThreadQ is moved to the end of ready_queue[core=0][prio=0]:
* ready_queue[core=0][prio=0]: ThreadA, ThreadB
* ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only]
* Currently Running: ThreadQ on Core 0 || ThreadP on Core 1
*
* A list of suggested threads for each core is compiled
* Suggested Threads: {ThreadC on Core 1}
* If this were quad core (as the switch is), there could be between 0 and 3 threads in this
* list. If there are more than one, the thread is selected by highest prio.
*
* ThreadC is core changed to Core 0:
* ready_queue[core=0][prio=0]: ThreadC, ThreadA, ThreadB, ThreadQ
* ready_queue[core=1][prio=0]: ThreadD
* Currently Running: None on Core 0 || ThreadP on Core 1
*
* System is rescheduled (ThreadC is popped off of queue):
* ready_queue[core=0][prio=0]: ThreadA, ThreadB, ThreadQ
* ready_queue[core=1][prio=0]: ThreadD
* Currently Running: ThreadC on Core 0 || ThreadP on Core 1
*
* If no suggested threads can be found this will behave just as normal yield. If there are
* multiple candidates for the suggested thread on a core, the highest prio is taken.
*/
void YieldWithLoadBalancing(Thread* thread);
/// Currently unknown -- asserts as unimplemented on call
void YieldAndWaitForLoadBalancing(Thread* thread);
/// Returns a list of all threads managed by the scheduler
const std::vector<SharedPtr<Thread>>& GetThreadList() const {
return thread_list;

View File

@@ -736,6 +736,13 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {
const auto* const current_process = Core::CurrentProcess();
// Note: The kernel uses the current process's resource limit instead of
// the one from the thread owner's resource limit.
const ResourceLimit& resource_limit = current_process->GetResourceLimit();
if (resource_limit.GetMaxResourceValue(ResourceType::Priority) > priority) {
return ERR_INVALID_THREAD_PRIORITY;
}
SharedPtr<Thread> thread = current_process->GetHandleTable().Get<Thread>(handle);
if (!thread) {
return ERR_INVALID_HANDLE;
@@ -789,7 +796,7 @@ static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 s
return ERR_INVALID_MEMORY_RANGE;
}
return shared_memory->Map(*current_process, addr, permissions_type, MemoryPermission::DontCare);
return shared_memory->Map(current_process, addr, permissions_type, MemoryPermission::DontCare);
}
static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size) {
@@ -819,7 +826,7 @@ static ResultCode UnmapSharedMemory(Handle shared_memory_handle, VAddr addr, u64
return ERR_INVALID_MEMORY_RANGE;
}
return shared_memory->Unmap(*current_process, addr);
return shared_memory->Unmap(current_process, addr);
}
/// Query process memory
@@ -878,6 +885,10 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
}
auto* const current_process = Core::CurrentProcess();
const ResourceLimit& resource_limit = current_process->GetResourceLimit();
if (resource_limit.GetMaxResourceValue(ResourceType::Priority) > priority) {
return ERR_INVALID_THREAD_PRIORITY;
}
if (processor_id == THREADPROCESSORID_DEFAULT) {
// Set the target CPU to the one specified in the process' exheader.
@@ -951,16 +962,46 @@ static void SleepThread(s64 nanoseconds) {
// Don't attempt to yield execution if there are no available threads to run,
// this way we avoid a useless reschedule to the idle thread.
if (nanoseconds == 0 && !Core::System::GetInstance().CurrentScheduler().HaveReadyThreads())
if (!Core::System::GetInstance().CurrentScheduler().HaveReadyThreads())
return;
enum class SleepType : s64 {
YieldWithoutLoadBalancing = 0,
YieldWithLoadBalancing = 1,
YieldAndWaitForLoadBalancing = 2,
};
if (nanoseconds <= 0) {
auto& scheduler{Core::System::GetInstance().CurrentScheduler()};
switch (static_cast<SleepType>(nanoseconds)) {
case SleepType::YieldWithoutLoadBalancing:
scheduler.YieldWithoutLoadBalancing(GetCurrentThread());
break;
case SleepType::YieldWithLoadBalancing:
scheduler.YieldWithLoadBalancing(GetCurrentThread());
break;
case SleepType::YieldAndWaitForLoadBalancing:
scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread());
break;
default:
UNREACHABLE_MSG(
"Unimplemented sleep yield type '{:016X}'! Falling back to forced reschedule...",
nanoseconds);
}
nanoseconds = 0;
}
// Sleep current thread and check for next thread to schedule
WaitCurrentThread_Sleep();
// Create an event to wake the thread up after the specified nanosecond delay has passed
GetCurrentThread()->WakeAfterDelay(nanoseconds);
Core::System::GetInstance().PrepareReschedule();
Core::System::GetInstance().CpuCore(0).PrepareReschedule();
Core::System::GetInstance().CpuCore(1).PrepareReschedule();
Core::System::GetInstance().CpuCore(2).PrepareReschedule();
Core::System::GetInstance().CpuCore(3).PrepareReschedule();
}
/// Wait process wide key atomic
@@ -1183,39 +1224,9 @@ static ResultCode ResetSignal(Handle handle) {
/// Creates a TransferMemory object
static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32 permissions) {
LOG_DEBUG(Kernel_SVC, "called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size,
permissions);
if (!Common::Is4KBAligned(addr)) {
LOG_ERROR(Kernel_SVC, "Address ({:016X}) is not page aligned!", addr);
return ERR_INVALID_ADDRESS;
}
if (!Common::Is4KBAligned(size) || size == 0) {
LOG_ERROR(Kernel_SVC, "Size ({:016X}) is not page aligned or equal to zero!", size);
return ERR_INVALID_ADDRESS;
}
if (!IsValidAddressRange(addr, size)) {
LOG_ERROR(Kernel_SVC, "Address and size cause overflow! (address={:016X}, size={:016X})",
addr, size);
return ERR_INVALID_ADDRESS_STATE;
}
const auto perms = static_cast<MemoryPermission>(permissions);
if (perms != MemoryPermission::None && perms != MemoryPermission::Read &&
perms != MemoryPermission::ReadWrite) {
LOG_ERROR(Kernel_SVC, "Invalid memory permissions for transfer memory! (perms={:08X})",
permissions);
return ERR_INVALID_MEMORY_PERMISSIONS;
}
auto& kernel = Core::System::GetInstance().Kernel();
auto& handle_table = Core::CurrentProcess()->GetHandleTable();
const auto shared_mem_handle = SharedMemory::Create(
kernel, handle_table.Get<Process>(CurrentProcess), size, perms, perms, addr);
CASCADE_RESULT(*handle, handle_table.Create(shared_mem_handle));
LOG_WARNING(Kernel_SVC, "(STUBBED) called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size,
permissions);
*handle = 0;
return RESULT_SUCCESS;
}

View File

@@ -26,6 +26,7 @@ enum ThreadPriority : u32 {
THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps
THREADPRIO_DEFAULT = 44, ///< Default thread priority for userland apps
THREADPRIO_LOWEST = 63, ///< Lowest thread priority
THREADPRIO_COUNT = 64, ///< Total number of possible thread priorities.
};
enum ThreadProcessorId : s32 {

View File

@@ -28,6 +28,8 @@ add_library(video_core STATIC
renderer_base.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_global_cache.cpp
renderer_opengl/gl_global_cache.h
renderer_opengl/gl_primitive_assembler.cpp
renderer_opengl/gl_primitive_assembler.h
renderer_opengl/gl_rasterizer.cpp

View File

@@ -68,13 +68,13 @@ void Fermi2D::HandleSurfaceCopy() {
Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer,
dst_buffer, true, regs.src.BlockHeight(),
regs.src.BlockDepth());
regs.src.BlockDepth(), 0);
} else {
// If the input is linear and the output is tiled, swizzle the input and copy it over.
Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer,
src_buffer, false, regs.dst.BlockHeight(),
regs.dst.BlockDepth());
regs.dst.BlockDepth(), 0);
}
}
}

View File

@@ -319,6 +319,11 @@ void Maxwell3D::DrawArrays() {
}
}
bool operator<(const Maxwell3D::GlobalMemoryDescriptor& lhs,
const Maxwell3D::GlobalMemoryDescriptor& rhs) {
return std::tie(lhs.cbuf_index, lhs.cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
}
void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];

View File

@@ -5,6 +5,7 @@
#pragma once
#include <array>
#include <set>
#include <unordered_map>
#include <vector>
#include "common/assert.h"
@@ -31,6 +32,12 @@ public:
explicit Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
~Maxwell3D() = default;
/// Structure representing a global memory region
struct GlobalMemoryDescriptor {
u64 cbuf_index;
u64 cbuf_offset;
};
/// Register structure of the Maxwell3D engine.
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
struct Regs {
@@ -879,7 +886,15 @@ public:
Cull cull;
INSERT_PADDING_WORDS(0x28);
INSERT_PADDING_WORDS(0x6);
union {
BitField<0, 1, u32> depth_range_0_1;
BitField<3, 1, u32> depth_clamp_near;
BitField<4, 1, u32> depth_clamp_far;
} view_volume_clip_control;
INSERT_PADDING_WORDS(0x21);
struct {
u32 enable;
@@ -1037,6 +1052,8 @@ public:
std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages;
u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
std::set<GlobalMemoryDescriptor> global_memory_uniforms;
};
State state{};
@@ -1069,6 +1086,9 @@ public:
return macro_memory;
}
std::string CreateGlobalMemoryRegion(std::tuple<u64, u64, u64> iadd_data);
std::set<std::pair<u64, u64>> ListGlobalMemoryRegions() const;
private:
void InitializeRegisterDefaults();
@@ -1123,6 +1143,9 @@ private:
void DrawArrays();
};
bool operator<(const Maxwell3D::GlobalMemoryDescriptor& lhs,
const Maxwell3D::GlobalMemoryDescriptor& rhs);
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \
"Field " #field_name " has invalid position")
@@ -1188,6 +1211,7 @@ ASSERT_REG_POSITION(primitive_restart, 0x591);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(cull, 0x646);
ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
ASSERT_REG_POSITION(logic_op, 0x671);
ASSERT_REG_POSITION(clear_buffers, 0x674);
ASSERT_REG_POSITION(color_mask, 0x680);

View File

@@ -1,4 +1,4 @@
// Copyright 2018 yuzu Emulator Project
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@@ -206,6 +206,8 @@ enum class UniformType : u64 {
SignedShort = 3,
Single = 4,
Double = 5,
Quad = 6,
UnsignedQuad = 7,
};
enum class StoreType : u64 {
@@ -772,6 +774,12 @@ union Instruction {
BitField<44, 2, u64> unknown;
} st_l;
union {
BitField<48, 3, UniformType> type;
BitField<46, 2, u64> cache_mode;
BitField<20, 24, s64> offset_immediate;
} ld_g;
union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;

View File

@@ -0,0 +1,97 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/utils.h"
namespace OpenGL {
CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
buffer.Create();
LabelGLObject(GL_BUFFER, buffer.handle, addr);
}
/// Helper function to get the maximum size we can use for an OpenGL uniform block
static u32 GetMaxUniformBlockSize() {
GLint max_size{};
glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &max_size);
return static_cast<u32>(max_size);
}
void CachedGlobalRegion::Reload(u32 size_) {
static const u32 max_size{GetMaxUniformBlockSize()};
size = size_;
if (size > max_size) {
size = max_size;
LOG_CRITICAL(HW_GPU, "Global region size {} exceeded max UBO size of {}!", size_, max_size);
}
glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle);
glBufferData(GL_UNIFORM_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
}
GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
auto search{reserve.find(addr)};
if (search == reserve.end()) {
return {};
}
return search->second;
}
GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
if (!region) {
// No reserved surface available, create a new one and reserve it
region = std::make_shared<CachedGlobalRegion>(addr, size);
ReserveGlobalRegion(region);
}
region->Reload(size);
return region;
}
void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) {
reserve[region->GetAddr()] = region;
}
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
: RasterizerCache{rasterizer} {}
GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
const Tegra::Engines::Maxwell3D::GlobalMemoryDescriptor& global_region,
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
auto& gpu{Core::System::GetInstance().GPU()};
const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
const auto cbuf_addr{gpu.MemoryManager().GpuToCpuAddress(
cbufs.const_buffers[global_region.cbuf_index].address + global_region.cbuf_offset)};
ASSERT(cbuf_addr);
const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
const auto size = Memory::Read32(*cbuf_addr + 8);
const auto actual_addr{gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu)};
ASSERT(actual_addr);
// Look up global region in the cache based on address
GlobalRegion region{TryGet(*actual_addr)};
if (!region) {
// No global region found - create a new one
region = GetUncachedGlobalRegion(*actual_addr, size);
Register(region);
}
return region;
}
} // namespace OpenGL

View File

@@ -0,0 +1,89 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <fmt/format.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class RasterizerOpenGL;
class CachedGlobalRegion;
using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
/// Helper class for caching global region uniform locations
class CachedGlobalRegionUniform {
public:
explicit CachedGlobalRegionUniform(std::size_t index) : index{index} {}
std::string GetName() const {
return fmt::format("global_memory_region_declblock_{}", index);
}
u32 GetHash() const {
// This needs to be unique from ConstBufferEntry::GetHash and SamplerEntry::GetHash
return (static_cast<u32>(index) << 16) | 0xFFFF;
}
private:
std::size_t index{};
};
class CachedGlobalRegion final : public RasterizerCacheObject {
public:
CachedGlobalRegion(VAddr addr, u32 size);
/// Gets the address of the shader in guest memory, required for cache management
VAddr GetAddr() const {
return addr;
}
/// Gets the size of the shader in guest memory, required for cache management
std::size_t GetSizeInBytes() const {
return size;
}
/// Gets the GL program handle for the buffer
GLuint GetBufferHandle() const {
return buffer.handle;
}
/// Reloads the global region from guest memory
void Reload(u32 size_);
// We do not have to flush this cache as things in it are never modified by us.
void Flush() override {}
private:
VAddr addr;
u32 size;
OGLBuffer buffer;
};
class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
public:
explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
/// Gets the current specified shader stage program
GlobalRegion GetGlobalRegion(
const Tegra::Engines::Maxwell3D::GlobalMemoryDescriptor& descriptor,
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
private:
GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
void ReserveGlobalRegion(const GlobalRegion& region);
std::unordered_map<VAddr, GlobalRegion> reserve;
};
} // namespace OpenGL

View File

@@ -81,7 +81,7 @@ struct DrawParameters {
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
: res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
buffer_cache(*this, STREAM_BUFFER_SIZE) {
buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
@@ -113,10 +113,43 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
CheckExtensions();
}
RasterizerOpenGL::~RasterizerOpenGL() {}
void RasterizerOpenGL::CheckExtensions() {
if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
LOG_WARNING(
Render_OpenGL,
"Anisotropic filter is not supported! This can cause graphical issues in some games.");
}
if (!GLAD_GL_ARB_viewport_array) {
LOG_WARNING(Render_OpenGL, "Viewport arrays are not supported! This can potentially cause "
"issues in games that use geometry shaders.");
}
if (!GLAD_GL_ARB_color_buffer_float) {
LOG_WARNING(
Render_OpenGL,
"Color clamp control is not supported! This can cause graphical issues in some games.");
}
if (!GLAD_GL_ARB_buffer_storage) {
LOG_WARNING(
Render_OpenGL,
"Buffer storage control is not supported! This can cause performance degradation.");
}
if (!GLAD_GL_AMD_depth_clamp_separate) {
if (!GLAD_GL_ARB_depth_clamp) {
LOG_WARNING(
Render_OpenGL,
"Depth Clamp is not supported! This can cause graphical issues in some games.");
} else {
LOG_WARNING(Render_OpenGL, "Separate Depth Clamp is not supported! This can cause "
"graphical issues in some games.");
}
}
}
void RasterizerOpenGL::SetupVertexFormat() {
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
@@ -267,7 +300,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_buffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_texture_bindpoint = 0;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -321,9 +354,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
// Configure the const buffers for this shader stage.
current_constbuffer_bindpoint =
current_buffer_bindpoint =
SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
current_constbuffer_bindpoint);
current_buffer_bindpoint);
// Configure global memory regions for this shader stage.
current_buffer_bindpoint =
SetupGlobalRegions(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
current_buffer_bindpoint);
// Configure the textures for this shader stage.
current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
@@ -695,6 +733,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(addr, size);
global_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
}
@@ -919,6 +958,29 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
return current_bindpoint + static_cast<u32>(entries.size());
}
u32 RasterizerOpenGL::SetupGlobalRegions(Maxwell::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_bindpoint) {
std::size_t global_region_index{};
const auto& maxwell3d{Core::System::GetInstance().GPU().Maxwell3D()};
for (const auto& global_region : maxwell3d.state.global_memory_uniforms) {
const auto& region{
global_cache.GetGlobalRegion(global_region, static_cast<Maxwell::ShaderStage>(stage))};
const GLenum b_index{
shader->GetProgramResourceIndex(CachedGlobalRegionUniform{global_region_index})};
if (b_index != GL_INVALID_INDEX) {
glBindBufferBase(GL_UNIFORM_BUFFER, current_bindpoint, region->GetBufferHandle());
glUniformBlockBinding(shader->GetProgramHandle(primitive_mode), b_index,
current_bindpoint);
++current_bindpoint;
}
++global_region_index;
}
return current_bindpoint;
}
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_unit) {
MICROPROFILE_SCOPE(OpenGL_Texture);
@@ -979,6 +1041,8 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
viewport.depth_range_far = regs.viewports[i].depth_range_far;
viewport.depth_range_near = regs.viewports[i].depth_range_near;
}
state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
}
void RasterizerOpenGL::SyncClipEnabled() {

View File

@@ -23,6 +23,7 @@
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -118,7 +119,7 @@ private:
bool using_depth_fb = true, bool preserve_contents = true,
std::optional<std::size_t> single_color_target = {});
/*
/**
* Configures the current constbuffers to use for the draw command.
* @param stage The shader stage to configure buffers for.
* @param shader The shader object that contains the specified stage.
@@ -128,7 +129,17 @@ private:
u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_bindpoint);
/*
/**
* Configures the current global memory regions to use for the draw command.
* @param stage The shader stage to configure buffers for.
* @param shader The shader object that contains the specified stage.
* @param current_bindpoint The offset at which to start counting new buffer bindpoints.
* @returns The next available bindpoint for use in the next shader stage.
*/
u32 SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_bindpoint);
/**
* Configures the current textures to use for the draw command.
* @param stage The shader stage to configure textures for.
* @param shader The shader object that contains the specified stage.
@@ -186,6 +197,10 @@ private:
/// Check asserts for alpha testing.
void CheckAlphaTests();
/// Check for extension that are not strictly required
/// but are needed for correct emulation
void CheckExtensions();
bool has_ARB_direct_state_access = false;
bool has_ARB_multi_bind = false;
@@ -193,6 +208,7 @@ private:
RasterizerCacheOpenGL res_cache;
ShaderCacheOpenGL shader_cache;
GlobalRegionCacheOpenGL global_cache;
Core::Frontend::EmuWindow& emu_window;

View File

@@ -95,6 +95,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
params.srgb_conversion);
@@ -160,6 +161,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.block_width = 1 << config.memory_layout.block_width;
params.block_height = 1 << config.memory_layout.block_height;
params.block_depth = 1 << config.memory_layout.block_depth;
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
@@ -195,6 +197,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.block_width = 1 << std::min(block_width, 5U);
params.block_height = 1 << std::min(block_height, 5U);
params.block_depth = 1 << std::min(block_depth, 5U);
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromDepthFormat(format);
params.component_type = ComponentTypeFromDepthFormat(format);
params.type = GetFormatType(params.pixel_format);
@@ -221,6 +224,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
params.tile_width_spacing = 1;
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
@@ -371,8 +375,8 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
}
template <bool morton_to_gl, PixelFormat format>
void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer,
std::size_t gl_buffer_size, VAddr addr) {
void MortonCopy(u32 stride, u32 width_spacing, u32 block_height, u32 height, u32 block_depth,
u32 depth, u8* gl_buffer, std::size_t gl_buffer_size, VAddr addr) {
constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@@ -382,17 +386,19 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 d
if (morton_to_gl) {
Tegra::Texture::UnswizzleTexture(gl_buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
stride, height, depth, block_height, block_depth);
stride, height, depth, block_height, block_depth,
width_spacing);
} else {
Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
(height + tile_size_y - 1) / tile_size_y, depth,
bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
gl_buffer, false, block_height, block_depth);
Tegra::Texture::CopySwizzledData(
(stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y,
depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), gl_buffer, false,
block_height, block_depth, width_spacing);
}
}
using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
VideoCore::Surface::MaxPixelFormat>;
using GLConversionArray =
std::array<void (*)(u32, u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
VideoCore::Surface::MaxPixelFormat>;
static constexpr GLConversionArray morton_to_gl_fns = {
// clang-format off
@@ -551,16 +557,17 @@ void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params
const u64 gl_size = params.LayerSizeGL(mip_level);
for (u32 i = 0; i < params.depth; i++) {
functions[static_cast<std::size_t>(params.pixel_format)](
params.MipWidth(mip_level), params.MipBlockHeight(mip_level),
params.MipHeight(mip_level), params.MipBlockDepth(mip_level), 1,
gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
params.MipWidth(mip_level), params.tile_width_spacing,
params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
params.MipBlockDepth(mip_level), 1, gl_buffer.data() + offset_gl, gl_size,
params.addr + offset);
offset += layer_size;
offset_gl += gl_size;
}
} else {
const u64 offset = params.GetMipmapLevelOffset(mip_level);
functions[static_cast<std::size_t>(params.pixel_format)](
params.MipWidth(mip_level), params.MipBlockHeight(mip_level),
params.MipWidth(mip_level), params.tile_width_spacing, params.MipBlockHeight(mip_level),
params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(),
gl_buffer.size(), params.addr + offset);
}

View File

@@ -208,6 +208,7 @@ struct SurfaceParams {
u32 block_width;
u32 block_height;
u32 block_depth;
u32 tile_width_spacing;
PixelFormat pixel_format;
ComponentType component_type;
SurfaceType type;

View File

@@ -98,18 +98,6 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
}
}
GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
const auto search{resource_cache.find(buffer.GetHash())};
if (search == resource_cache.end()) {
const GLuint index{
glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
resource_cache[buffer.GetHash()] = index;
return index;
}
return search->second;
}
GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
const auto search{uniform_cache.find(sampler.GetHash())};
if (search == uniform_cache.end()) {

View File

@@ -71,7 +71,18 @@ public:
}
/// Gets the GL program resource location for the specified resource, caching as needed
GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer);
template <typename T>
GLuint GetProgramResourceIndex(const T& buffer) {
const auto& search{resource_cache.find(buffer.GetHash())};
if (search == resource_cache.end()) {
const GLuint index{glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK,
buffer.GetName().c_str())};
resource_cache[buffer.GetHash()] = index;
return index;
}
return search->second;
}
/// Gets the GL uniform location for the specified resource, caching as needed
GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);

View File

@@ -13,6 +13,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -570,6 +571,7 @@ public:
GenerateInputAttrs();
GenerateOutputAttrs();
GenerateConstBuffers();
GenerateGlobalRegions();
GenerateSamplers();
GenerateGeometry();
}
@@ -691,6 +693,21 @@ private:
declarations.AddNewLine();
}
/// Generates declarations for global memory regions.
void GenerateGlobalRegions() {
const auto& regions{
Core::System::GetInstance().GPU().Maxwell3D().state.global_memory_uniforms};
for (std::size_t i = 0; i < regions.size(); ++i) {
declarations.AddLine("layout(std140) uniform " +
fmt::format("global_memory_region_declblock_{}", i));
declarations.AddLine('{');
declarations.AddLine(" vec4 global_memory_region_" + std::to_string(i) + "[0x400];");
declarations.AddLine("};");
declarations.AddNewLine();
}
declarations.AddNewLine();
}
/// Generates declarations for samplers.
void GenerateSamplers() {
const auto& samplers = GetSamplers();
@@ -1778,6 +1795,11 @@ private:
} else {
op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
GLSLRegister::Type::Integer);
if (opcode->get().GetId() == OpCode::Id::IADD_C) {
s_last_iadd = last_iadd;
last_iadd = IADDReference{instr.gpr8.Value(), instr.cbuf34.index,
instr.cbuf34.offset};
}
}
}
@@ -3008,6 +3030,72 @@ private:
shader.AddLine('}');
break;
}
case OpCode::Id::LDG: {
// Determine number of GPRs to fill with data
u64 count = 1;
switch (instr.ld_g.type) {
case Tegra::Shader::UniformType::Single:
count = 1;
break;
case Tegra::Shader::UniformType::Double:
count = 2;
break;
case Tegra::Shader::UniformType::Quad:
case Tegra::Shader::UniformType::UnsignedQuad:
count = 4;
break;
default:
UNREACHABLE_MSG("Unimplemented LDG size!");
}
auto [gpr_index, index, offset] = last_iadd;
// The last IADD might be the upper u32 of address, so instead take the one before
// that.
if (gpr_index == Register::ZeroIndex) {
gpr_index = s_last_iadd.out;
index = s_last_iadd.cbuf_index;
offset = s_last_iadd.cbuf_offset;
}
const auto gpr = regs.GetRegisterAsInteger(gpr_index);
const auto constbuffer =
regs.GetUniform(index, offset, GLSLRegister::Type::UnsignedInteger);
Core::System::GetInstance().GPU().Maxwell3D().state.global_memory_uniforms.insert(
{index, offset * 4});
const auto memory = fmt::format("global_memory_region_{}",
Core::System::GetInstance()
.GPU()
.Maxwell3D()
.state.global_memory_uniforms.size() -
1);
const auto immediate = std::to_string(instr.ld_g.offset_immediate.Value());
const auto o_register = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
const auto address = "( " + immediate + " + " + o_register + " )";
const auto base_sub = address + " - " + constbuffer;
// New scope to prevent potential conflicts
shader.AddLine('{');
++shader.scope;
shader.AddLine("uint final_offset = " + base_sub + ";");
for (std::size_t out = 0; out < count; ++out) {
const u64 reg_id = instr.gpr0.Value() + out;
const auto this_memory =
fmt::format("{}[(final_offset + {}) / 16][((final_offset + {}) / 4) % 4]",
memory, out * 4, out * 4);
regs.SetRegisterToFloat(reg_id, 0, this_memory, 1, 1);
}
--shader.scope;
shader.AddLine('}');
break;
}
default: {
UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
}
@@ -3777,9 +3865,18 @@ private:
ShaderWriter declarations;
GLSLRegisterManager regs{shader, declarations, stage, suffix, header};
struct IADDReference {
Register out;
u64 cbuf_index;
u64 cbuf_offset;
};
IADDReference last_iadd{};
IADDReference s_last_iadd{};
// Declarations
std::set<std::string> declr_predicates;
}; // namespace OpenGL::GLShader::Decompiler
};
std::string GetCommonDeclarations() {
return fmt::format("#define MAX_CONSTBUFFER_ELEMENTS {}\n",

View File

@@ -57,7 +57,8 @@ public:
}
u32 GetHash() const {
return (static_cast<u32>(stage) << 16) | index;
// This needs to be unique from CachedGlobalRegionUniform::GetHash
return (static_cast<u32>(stage) << 12) | index;
}
private:
@@ -138,7 +139,8 @@ public:
}
u32 GetHash() const {
return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index);
// This needs to be unique from CachedGlobalRegionUniform::GetHash
return (static_cast<u32>(stage) << 12) | static_cast<u32>(sampler_index);
}
static std::string GetArrayName(Maxwell::ShaderStage stage) {

View File

@@ -92,6 +92,8 @@ OpenGLState::OpenGLState() {
point.size = 1;
fragment_color_clamp.enabled = false;
depth_clamp.far_plane = false;
depth_clamp.near_plane = false;
}
void OpenGLState::ApplyDefaultState() {
@@ -469,6 +471,32 @@ void OpenGLState::ApplyVertexBufferState() const {
}
}
void OpenGLState::ApplyDepthClamp() const {
if (depth_clamp.far_plane != cur_state.depth_clamp.far_plane ||
depth_clamp.near_plane != cur_state.depth_clamp.near_plane) {
if (GLAD_GL_AMD_depth_clamp_separate) {
if (depth_clamp.far_plane) {
glEnable(GL_DEPTH_CLAMP_FAR_AMD);
} else {
glDisable(GL_DEPTH_CLAMP_FAR_AMD);
}
if (depth_clamp.near_plane) {
glEnable(GL_DEPTH_CLAMP_NEAR_AMD);
} else {
glDisable(GL_DEPTH_CLAMP_NEAR_AMD);
}
} else {
if (depth_clamp.far_plane || depth_clamp.near_plane) {
glEnable(GL_DEPTH_CLAMP);
} else {
glDisable(GL_DEPTH_CLAMP);
}
UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
"Unimplemented Depth Clamp Separation!");
}
}
}
void OpenGLState::Apply() const {
ApplyFramebufferState();
ApplyVertexBufferState();
@@ -520,7 +548,7 @@ void OpenGLState::Apply() const {
glDisable(GL_SAMPLE_ALPHA_TO_ONE);
}
}
ApplyDepthClamp();
ApplyColorMask();
ApplyViewport();
ApplyStencilTest();

View File

@@ -48,6 +48,11 @@ public:
bool enabled; // GL_CLAMP_FRAGMENT_COLOR_ARB
} fragment_color_clamp;
struct {
bool far_plane;
bool near_plane;
} depth_clamp; // GL_DEPTH_CLAMP
struct {
bool enabled; // viewports arrays are only supported when geometry shaders are enabled.
} geometry_shaders;
@@ -226,6 +231,7 @@ private:
void ApplyLogicOp() const;
void ApplyTextures() const;
void ApplySamplers() const;
void ApplyDepthClamp() const;
};
} // namespace OpenGL

View File

@@ -127,7 +127,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
template <bool fast>
void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel,
const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth) {
const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth,
const u32 width_spacing) {
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 stride_x = width * out_bytes_per_pixel;
const u32 layer_z = height * stride_x;
@@ -137,7 +138,8 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
const u32 block_x_elements = gob_elements_x;
const u32 block_y_elements = gob_elements_y * block_height;
const u32 block_z_elements = gob_elements_z * block_depth;
const u32 blocks_on_x = div_ceil(width, block_x_elements);
const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing);
const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements);
const u32 blocks_on_y = div_ceil(height, block_y_elements);
const u32 blocks_on_z = div_ceil(depth, block_z_elements);
const u32 xy_block_size = gob_size * block_height;
@@ -169,13 +171,15 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data,
bool unswizzle, u32 block_height, u32 block_depth) {
bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) {
SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth);
bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth,
width_spacing);
} else {
SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth);
bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth,
width_spacing);
}
}
@@ -228,19 +232,19 @@ u32 BytesPerPixel(TextureFormat format) {
void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
u32 block_depth) {
u32 block_depth, u32 width_spacing) {
CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
(height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true,
block_height, block_depth);
block_height, block_depth, width_spacing);
}
std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth) {
u32 block_height, u32 block_depth, u32 width_spacing) {
std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
width, height, depth, block_height, block_depth);
width, height, depth, block_height, block_depth, width_spacing);
return unswizzled_data;
}

View File

@@ -22,19 +22,20 @@ inline std::size_t GetGOBSize() {
void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height = TICEntry::DefaultBlockHeight,
u32 block_depth = TICEntry::DefaultBlockHeight);
u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
/**
* Unswizzles a swizzled texture without changing its format.
*/
std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height = TICEntry::DefaultBlockHeight,
u32 block_depth = TICEntry::DefaultBlockHeight);
u32 block_depth = TICEntry::DefaultBlockHeight,
u32 width_spacing = 0);
/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary.
void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
bool unswizzle, u32 block_height, u32 block_depth);
bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
/**
* Decodes an unswizzled texture into a A8R8G8B8 texture.

View File

@@ -166,6 +166,8 @@ struct TICEntry {
BitField<3, 3, u32> block_height;
BitField<6, 3, u32> block_depth;
BitField<10, 3, u32> tile_width_spacing;
// High 16 bits of the pitch value
BitField<0, 16, u32> pitch_high;
BitField<26, 1, u32> use_header_opt_control;