Compare commits

...

33 Commits

Author SHA1 Message Date
Zach Hilman
f61379f8d2 patch_manager: Move non-Program RomFS patch log to Debug
Normal Program-type patches will still be logged to aid in debugging, but for others (mainly Control), it was moved to Debug.
2018-10-12 23:27:19 -04:00
Zach Hilman
90c07e0d33 content_archive: Move get key log to Trace level
Avoids printing live keys in the general log.
2018-10-12 23:25:59 -04:00
bunnei
2946d4bdbe Merge pull request #1467 from ogniK5377/svcbreak-type-fix
Fixed incorrect types for svcBreak
2018-10-12 12:08:08 -04:00
bunnei
0f7ab3e21a Merge pull request #1478 from ogniK5377/remap-invalidhandle-remap
Passing an invalid nmap handle to Remap should throw an error
2018-10-12 12:07:14 -04:00
bunnei
f9d03b1d41 Merge pull request #1482 from lioncash/init
thread: Remove unnecessary memset from ResetThreadContext()
2018-10-12 12:06:51 -04:00
bunnei
dc328440c8 Merge pull request #1479 from ogniK5377/nmap-revamped
Added error codes for nvmap
2018-10-12 12:06:22 -04:00
Lioncash
b492d43e63 thread: Remove unnecessary memset from ResetThreadContext()
Regular value initialization is adequate here for zeroing out data. It
also has the benefit of not invoking undefined behavior if a non-trivial
type is ever added to the struct for whatever reason.
2018-10-12 10:57:31 -04:00
David Marcec
4d2de6564f Returned an error before processing other remaps 2018-10-12 17:10:41 +11:00
David Marcec
c55b5de0fb Made the minimum alignment more clear 2018-10-12 17:06:46 +11:00
bunnei
9bf409f275 Merge pull request #1474 from ogniK5377/hwopus-decodeinterleavedwithperformance
HwOpus, Implemented DecodeInterleavedWithPerformance
2018-10-11 16:52:13 -04:00
bunnei
3fd26b7147 Merge pull request #1472 from lioncash/san
svc: Add missing address range sanitizing checks to MapMemory/UnmapMemory
2018-10-11 16:51:41 -04:00
bunnei
bc293e1751 Merge pull request #1476 from bunnei/fix-unmap-flush
nvhost_as_gpu: Flush/invalidate CPU VAddr on UnmapBuffer.
2018-10-11 16:51:28 -04:00
bunnei
83ac3e6395 Merge pull request #1477 from ReinUsesLisp/vmad
gl_shader_decompiler: Implement VMAD
2018-10-11 16:51:09 -04:00
David Marcec
c7763603ef Added error codes for nvmap 2018-10-11 23:06:34 +11:00
David Marcec
5dd538cace Passing an invalid nmap handle to Remap should throw an error
Added error for invalid nmap handles
2018-10-11 20:32:21 +11:00
ReinUsesLisp
17290a4416 gl_shader_decompiler: Implement VMAD 2018-10-11 04:15:10 -03:00
bunnei
bf795edac4 nvhost_as_gpu: Flush CPU VAddr on UnmapBuffer. 2018-10-11 00:19:36 -04:00
David Marcec
fa10905e1e HwOpus, Implemented DecodeInterleavedWithPerformance
Used by sonic ages
2018-10-11 13:06:56 +11:00
bunnei
6d82c4adf9 Merge pull request #1458 from FernandoS27/fix-render-target-block-settings
Fixed block height settings for RenderTargets and Depth Buffers
2018-10-10 21:24:07 -04:00
Lioncash
72e9cb523e svc: Add missing address range sanitizing checks to MapMemory/UnmapMemory
This adds the missing address range checking that the service functions
do before attempting to map or unmap memory. Given that both service
functions perform the same set of checks in the same order, we can wrap
these into a function and just call it from both functions, which
deduplicates a little bit of code.
2018-10-10 20:30:49 -04:00
bunnei
03ec936ca0 Merge pull request #1460 from FernandoS27/scissor_test
Implemented Scissor Testing
2018-10-10 12:04:10 -04:00
bunnei
ee1b204749 Merge pull request #1425 from ReinUsesLisp/geometry-shaders
gl_shader_decompiler: Implement geometry shaders
2018-10-10 11:51:29 -04:00
bunnei
68b3d8b7a9 Merge pull request #1469 from lioncash/ptr
kernel/thread: Use a regular pointer for the owner/current process
2018-10-10 10:34:20 -04:00
FernandoS27
5f4ee6f0c8 Add memory Layout to Render Targets and Depth Buffers 2018-10-09 22:28:19 -04:00
David Marcec
2db37ddea9 Changed all casts in svc_wrap.h to be static_cast instead 2018-10-10 12:49:08 +11:00
David Marcec
09b6dda8f0 Use a better name than "dont_kill_application"
signal_debugger seems like a more fitting name
2018-10-10 12:27:44 +11:00
David Marcec
a4412c8e22 Fixed incorrect types for svcBreak
svcBreak reason should be a u32, not a u64.
2018-10-10 12:23:50 +11:00
FernandoS27
af653906d0 Fixed block height settings for RenderTargets and Depth Buffers, and added block width and block depth 2018-10-09 21:14:32 -04:00
FernandoS27
be97fc884d Implement Scissor Test 2018-10-08 21:36:23 -04:00
FernandoS27
30ff42b8cc Assert Scissor tests 2018-10-08 20:49:36 -04:00
ReinUsesLisp
7c2d6ef210 gl_shader_decompiler: Move position varying location from 15 to 0 and apply an offset 2018-10-07 17:36:00 -03:00
ReinUsesLisp
ee4d538850 gl_shader_decompiler: Implement geometry shaders 2018-10-07 17:36:00 -03:00
ReinUsesLisp
4d0c682468 video_core: Allow LabelGLObject to use extra info on any object 2018-10-07 17:27:49 -03:00
25 changed files with 1026 additions and 220 deletions

View File

@@ -133,7 +133,7 @@ boost::optional<Core::Crypto::Key128> NCA::GetKeyAreaKey(NCASectionCryptoType ty
static_cast<u8>(type));
u128 out_128{};
memcpy(out_128.data(), out.data(), 16);
LOG_DEBUG(Crypto, "called with crypto_rev={:02X}, kak_index={:02X}, key={:016X}{:016X}",
LOG_TRACE(Crypto, "called with crypto_rev={:02X}, kak_index={:02X}, key={:016X}{:016X}",
master_key_id, header.key_index, out_128[1], out_128[0]);
return out;

View File

@@ -214,8 +214,14 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, ContentRecordType type,
VirtualFile update_raw) const {
LOG_INFO(Loader, "Patching RomFS for title_id={:016X}, type={:02X}", title_id,
static_cast<u8>(type));
const auto log_string = fmt::format("Patching RomFS for title_id={:016X}, type={:02X}",
title_id, static_cast<u8>(type))
.c_str();
if (type == ContentRecordType::Program)
LOG_INFO(Loader, log_string);
else
LOG_DEBUG(Loader, log_string);
if (romfs == nullptr)
return romfs;

View File

@@ -22,6 +22,7 @@ enum {
HandleTableFull = 105,
InvalidMemoryState = 106,
InvalidMemoryPermissions = 108,
InvalidMemoryRange = 110,
InvalidThreadPriority = 112,
InvalidProcessorId = 113,
InvalidHandle = 114,
@@ -56,6 +57,7 @@ constexpr ResultCode ERR_INVALID_ADDRESS(ErrorModule::Kernel, ErrCodes::InvalidA
constexpr ResultCode ERR_INVALID_ADDRESS_STATE(ErrorModule::Kernel, ErrCodes::InvalidMemoryState);
constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS(ErrorModule::Kernel,
ErrCodes::InvalidMemoryPermissions);
constexpr ResultCode ERR_INVALID_MEMORY_RANGE(ErrorModule::Kernel, ErrCodes::InvalidMemoryRange);
constexpr ResultCode ERR_INVALID_HANDLE(ErrorModule::Kernel, ErrCodes::InvalidHandle);
constexpr ResultCode ERR_INVALID_PROCESSOR_ID(ErrorModule::Kernel, ErrCodes::InvalidProcessorId);
constexpr ResultCode ERR_INVALID_SIZE(ErrorModule::Kernel, ErrCodes::InvalidSize);

View File

@@ -39,6 +39,73 @@ namespace {
constexpr bool Is4KBAligned(VAddr address) {
return (address & 0xFFF) == 0;
}
// Checks if address + size is greater than the given address
// This can return false if the size causes an overflow of a 64-bit type
// or if the given size is zero.
constexpr bool IsValidAddressRange(VAddr address, u64 size) {
return address + size > address;
}
// Checks if a given address range lies within a larger address range.
constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
VAddr address_range_end) {
const VAddr end_address = address + size - 1;
return address_range_begin <= address && end_address <= address_range_end - 1;
}
bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
vm.GetAddressSpaceEndAddress());
}
bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
vm.GetNewMapRegionEndAddress());
}
// Helper function that performs the common sanity checks for svcMapMemory
// and svcUnmapMemory. This is doable, as both functions perform their sanitizing
// in the same order.
ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_addr, VAddr src_addr,
u64 size) {
if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) {
return ERR_INVALID_ADDRESS;
}
if (size == 0 || !Is4KBAligned(size)) {
return ERR_INVALID_SIZE;
}
if (!IsValidAddressRange(dst_addr, size)) {
return ERR_INVALID_ADDRESS_STATE;
}
if (!IsValidAddressRange(src_addr, size)) {
return ERR_INVALID_ADDRESS_STATE;
}
if (!IsInsideAddressSpace(vm_manager, src_addr, size)) {
return ERR_INVALID_ADDRESS_STATE;
}
if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) {
return ERR_INVALID_MEMORY_RANGE;
}
const VAddr dst_end_address = dst_addr + size;
if (dst_end_address > vm_manager.GetHeapRegionBaseAddress() &&
dst_addr < vm_manager.GetHeapRegionEndAddress()) {
return ERR_INVALID_MEMORY_RANGE;
}
if (dst_end_address > vm_manager.GetNewMapRegionBaseAddress() &&
dst_addr < vm_manager.GetMapRegionEndAddress()) {
return ERR_INVALID_MEMORY_RANGE;
}
return RESULT_SUCCESS;
}
} // Anonymous namespace
/// Set the process heap to a given Size. It can both extend and shrink the heap.
@@ -69,15 +136,15 @@ static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
src_addr, size);
if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) {
return ERR_INVALID_ADDRESS;
auto* const current_process = Core::CurrentProcess();
const auto& vm_manager = current_process->VMManager();
const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size);
if (result != RESULT_SUCCESS) {
return result;
}
if (size == 0 || !Is4KBAligned(size)) {
return ERR_INVALID_SIZE;
}
return Core::CurrentProcess()->MirrorMemory(dst_addr, src_addr, size);
return current_process->MirrorMemory(dst_addr, src_addr, size);
}
/// Unmaps a region that was previously mapped with svcMapMemory
@@ -85,15 +152,15 @@ static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
src_addr, size);
if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) {
return ERR_INVALID_ADDRESS;
auto* const current_process = Core::CurrentProcess();
const auto& vm_manager = current_process->VMManager();
const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size);
if (result != RESULT_SUCCESS) {
return result;
}
if (size == 0 || !Is4KBAligned(size)) {
return ERR_INVALID_SIZE;
}
return Core::CurrentProcess()->UnmapMemory(dst_addr, src_addr, size);
return current_process->UnmapMemory(dst_addr, src_addr, size);
}
/// Connect to an OS service given the port name, returns the handle to the port to out
@@ -303,15 +370,15 @@ static ResultCode ArbitrateUnlock(VAddr mutex_addr) {
struct BreakReason {
union {
u64 raw;
BitField<31, 1, u64> dont_kill_application;
u32 raw;
BitField<31, 1, u32> signal_debugger;
};
};
/// Break program execution
static void Break(u64 reason, u64 info1, u64 info2) {
static void Break(u32 reason, u64 info1, u64 info2) {
BreakReason break_reason{reason};
if (break_reason.dont_kill_application) {
if (break_reason.signal_debugger) {
LOG_ERROR(
Debug_Emulated,
"Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}",

View File

@@ -35,18 +35,18 @@ void SvcWrap() {
template <ResultCode func(u32)>
void SvcWrap() {
FuncReturn(func((u32)Param(0)).raw);
FuncReturn(func(static_cast<u32>(Param(0))).raw);
}
template <ResultCode func(u32, u32)>
void SvcWrap() {
FuncReturn(func((u32)Param(0), (u32)Param(1)).raw);
FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1))).raw);
}
template <ResultCode func(u32*, u32)>
void SvcWrap() {
u32 param_1 = 0;
u32 retval = func(&param_1, (u32)Param(1)).raw;
u32 retval = func(&param_1, static_cast<u32>(Param(1))).raw;
Core::CurrentArmInterface().SetReg(1, param_1);
FuncReturn(retval);
}
@@ -61,7 +61,7 @@ void SvcWrap() {
template <ResultCode func(u64, s32)>
void SvcWrap() {
FuncReturn(func(Param(0), (s32)Param(1)).raw);
FuncReturn(func(Param(0), static_cast<s32>(Param(1))).raw);
}
template <ResultCode func(u64, u32)>
@@ -79,19 +79,19 @@ void SvcWrap() {
template <ResultCode func(u32, u64)>
void SvcWrap() {
FuncReturn(func((u32)(Param(0) & 0xFFFFFFFF), Param(1)).raw);
FuncReturn(func(static_cast<u32>(Param(0)), Param(1)).raw);
}
template <ResultCode func(u32, u32, u64)>
void SvcWrap() {
FuncReturn(func((u32)(Param(0) & 0xFFFFFFFF), (u32)(Param(1) & 0xFFFFFFFF), Param(2)).raw);
FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1)), Param(2)).raw);
}
template <ResultCode func(u32, u32*, u64*)>
void SvcWrap() {
u32 param_1 = 0;
u64 param_2 = 0;
ResultCode retval = func((u32)(Param(2) & 0xFFFFFFFF), &param_1, &param_2);
ResultCode retval = func(static_cast<u32>(Param(2)), &param_1, &param_2);
Core::CurrentArmInterface().SetReg(1, param_1);
Core::CurrentArmInterface().SetReg(2, param_2);
FuncReturn(retval.raw);
@@ -100,12 +100,12 @@ void SvcWrap() {
template <ResultCode func(u64, u64, u32, u32)>
void SvcWrap() {
FuncReturn(
func(Param(0), Param(1), (u32)(Param(3) & 0xFFFFFFFF), (u32)(Param(3) & 0xFFFFFFFF)).raw);
func(Param(0), Param(1), static_cast<u32>(Param(3)), static_cast<u32>(Param(3))).raw);
}
template <ResultCode func(u32, u64, u32)>
void SvcWrap() {
FuncReturn(func((u32)Param(0), Param(1), (u32)Param(2)).raw);
FuncReturn(func(static_cast<u32>(Param(0)), Param(1), static_cast<u32>(Param(2))).raw);
}
template <ResultCode func(u64, u64, u64)>
@@ -115,25 +115,28 @@ void SvcWrap() {
template <ResultCode func(u32, u64, u64, u32)>
void SvcWrap() {
FuncReturn(func((u32)Param(0), Param(1), Param(2), (u32)Param(3)).raw);
FuncReturn(
func(static_cast<u32>(Param(0)), Param(1), Param(2), static_cast<u32>(Param(3))).raw);
}
template <ResultCode func(u32, u64, u64)>
void SvcWrap() {
FuncReturn(func((u32)Param(0), Param(1), Param(2)).raw);
FuncReturn(func(static_cast<u32>(Param(0)), Param(1), Param(2)).raw);
}
template <ResultCode func(u32*, u64, u64, s64)>
void SvcWrap() {
u32 param_1 = 0;
ResultCode retval = func(&param_1, Param(1), (u32)(Param(2) & 0xFFFFFFFF), (s64)Param(3));
ResultCode retval =
func(&param_1, Param(1), static_cast<u32>(Param(2)), static_cast<s64>(Param(3)));
Core::CurrentArmInterface().SetReg(1, param_1);
FuncReturn(retval.raw);
}
template <ResultCode func(u64, u64, u32, s64)>
void SvcWrap() {
FuncReturn(func(Param(0), Param(1), (u32)Param(2), (s64)Param(3)).raw);
FuncReturn(
func(Param(0), Param(1), static_cast<u32>(Param(2)), static_cast<s64>(Param(3))).raw);
}
template <ResultCode func(u64*, u64, u64, u64)>
@@ -147,9 +150,9 @@ void SvcWrap() {
template <ResultCode func(u32*, u64, u64, u64, u32, s32)>
void SvcWrap() {
u32 param_1 = 0;
u32 retval =
func(&param_1, Param(1), Param(2), Param(3), (u32)Param(4), (s32)(Param(5) & 0xFFFFFFFF))
.raw;
u32 retval = func(&param_1, Param(1), Param(2), Param(3), static_cast<u32>(Param(4)),
static_cast<s32>(Param(5)))
.raw;
Core::CurrentArmInterface().SetReg(1, param_1);
FuncReturn(retval);
}
@@ -172,7 +175,7 @@ void SvcWrap() {
template <ResultCode func(u32*, u64, u64, u32)>
void SvcWrap() {
u32 param_1 = 0;
u32 retval = func(&param_1, Param(1), Param(2), (u32)(Param(3) & 0xFFFFFFFF)).raw;
u32 retval = func(&param_1, Param(1), Param(2), static_cast<u32>(Param(3))).raw;
Core::CurrentArmInterface().SetReg(1, param_1);
FuncReturn(retval);
}
@@ -181,22 +184,22 @@ template <ResultCode func(Handle*, u64, u32, u32)>
void SvcWrap() {
u32 param_1 = 0;
u32 retval =
func(&param_1, Param(1), (u32)(Param(2) & 0xFFFFFFFF), (u32)(Param(3) & 0xFFFFFFFF)).raw;
func(&param_1, Param(1), static_cast<u32>(Param(2)), static_cast<u32>(Param(3))).raw;
Core::CurrentArmInterface().SetReg(1, param_1);
FuncReturn(retval);
}
template <ResultCode func(u64, u32, s32, s64)>
void SvcWrap() {
FuncReturn(
func(Param(0), (u32)(Param(1) & 0xFFFFFFFF), (s32)(Param(2) & 0xFFFFFFFF), (s64)Param(3))
.raw);
FuncReturn(func(Param(0), static_cast<u32>(Param(1)), static_cast<s32>(Param(2)),
static_cast<s64>(Param(3)))
.raw);
}
template <ResultCode func(u64, u32, s32, s32)>
void SvcWrap() {
FuncReturn(func(Param(0), (u32)(Param(1) & 0xFFFFFFFF), (s32)(Param(2) & 0xFFFFFFFF),
(s32)(Param(3) & 0xFFFFFFFF))
FuncReturn(func(Param(0), static_cast<u32>(Param(1)), static_cast<s32>(Param(2)),
static_cast<s32>(Param(3)))
.raw);
}
@@ -226,7 +229,7 @@ void SvcWrap() {
template <void func(s64)>
void SvcWrap() {
func((s64)Param(0));
func(static_cast<s64>(Param(0)));
}
template <void func(u64, u64 len)>
@@ -239,4 +242,9 @@ void SvcWrap() {
func(Param(0), Param(1), Param(2));
}
template <void func(u32, u64, u64)>
void SvcWrap() {
func(static_cast<u32>(Param(0)), Param(1), Param(2));
}
} // namespace Kernel

View File

@@ -183,13 +183,10 @@ void Thread::ResumeFromWait() {
*/
static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAddr stack_top,
VAddr entry_point, u64 arg) {
memset(&context, 0, sizeof(Core::ARM_Interface::ThreadContext));
context = {};
context.cpu_registers[0] = arg;
context.pc = entry_point;
context.sp = stack_top;
context.pstate = 0;
context.fpcr = 0;
}
ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name, VAddr entry_point,

View File

@@ -2,8 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <chrono>
#include <cstring>
#include <memory>
#include <optional>
#include <vector>
#include <opus.h>
@@ -33,7 +35,8 @@ public:
{1, nullptr, "SetContext"},
{2, nullptr, "DecodeInterleavedForMultiStream"},
{3, nullptr, "SetContextForMultiStream"},
{4, nullptr, "Unknown4"},
{4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerformance,
"DecodeInterleavedWithPerformance"},
{5, nullptr, "Unknown5"},
{6, nullptr, "Unknown6"},
{7, nullptr, "Unknown7"},
@@ -59,8 +62,31 @@ private:
ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
}
bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input,
std::vector<opus_int16>& output) {
void DecodeInterleavedWithPerformance(Kernel::HLERequestContext& ctx) {
u32 consumed = 0;
u32 sample_count = 0;
u64 performance = 0;
std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16));
if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples,
performance)) {
IPC::ResponseBuilder rb{ctx, 2};
// TODO(ogniK): Use correct error code
rb.Push(ResultCode(-1));
return;
}
IPC::ResponseBuilder rb{ctx, 6};
rb.Push(RESULT_SUCCESS);
rb.Push<u32>(consumed);
rb.Push<u64>(performance);
rb.Push<u32>(sample_count);
ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
}
bool Decoder_DecodeInterleaved(
u32& consumed, u32& sample_count, const std::vector<u8>& input,
std::vector<opus_int16>& output,
std::optional<std::reference_wrapper<u64>> performance_time = std::nullopt) {
const auto start_time = std::chrono::high_resolution_clock::now();
std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
if (sizeof(OpusHeader) > input.size())
return false;
@@ -80,8 +106,13 @@ private:
(static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)), 0);
if (out_sample_count < 0)
return false;
const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
sample_count = out_sample_count;
consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz);
if (performance_time.has_value()) {
performance_time->get() =
std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
}
return true;
}

View File

@@ -15,6 +15,11 @@
#include "video_core/renderer_base.h"
namespace Service::Nvidia::Devices {
namespace NvErrCodes {
enum {
InvalidNmapHandle = -22,
};
}
nvhost_as_gpu::nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {}
nvhost_as_gpu::~nvhost_as_gpu() = default;
@@ -79,14 +84,16 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
std::memcpy(entries.data(), input.data(), input.size());
auto& gpu = Core::System::GetInstance().GPU();
for (const auto& entry : entries) {
LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
entry.offset, entry.nvmap_handle, entry.pages);
Tegra::GPUVAddr offset = static_cast<Tegra::GPUVAddr>(entry.offset) << 0x10;
auto object = nvmap_dev->GetObject(entry.nvmap_handle);
ASSERT(object);
if (!object) {
LOG_CRITICAL(Service_NVDRV, "nvmap {} is an invalid handle!", entry.nvmap_handle);
std::memcpy(output.data(), entries.data(), output.size());
return static_cast<u32>(NvErrCodes::InvalidNmapHandle);
}
ASSERT(object->status == nvmap::Object::Status::Allocated);
@@ -167,10 +174,11 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
auto& system_instance = Core::System::GetInstance();
// Remove this memory region from the rasterizer cache.
system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(params.offset,
itr->second.size);
auto& gpu = system_instance.GPU();
auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
ASSERT(cpu_addr);
system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
buffer_mappings.erase(itr->second.offset);

View File

@@ -11,6 +11,13 @@
namespace Service::Nvidia::Devices {
namespace NvErrCodes {
enum {
OperationNotPermitted = -1,
InvalidValue = -22,
};
}
nvmap::nvmap() = default;
nvmap::~nvmap() = default;
@@ -44,7 +51,11 @@ u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& o
u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) {
IocCreateParams params;
std::memcpy(&params, input.data(), sizeof(params));
LOG_DEBUG(Service_NVDRV, "size=0x{:08X}", params.size);
if (!params.size) {
return static_cast<u32>(NvErrCodes::InvalidValue);
}
// Create a new nvmap object and obtain a handle to it.
auto object = std::make_shared<Object>();
object->id = next_id++;
@@ -55,8 +66,6 @@ u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) {
u32 handle = next_handle++;
handles[handle] = std::move(object);
LOG_DEBUG(Service_NVDRV, "size=0x{:08X}", params.size);
params.handle = handle;
std::memcpy(output.data(), &params, sizeof(params));
@@ -66,9 +75,29 @@ u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) {
u32 nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) {
IocAllocParams params;
std::memcpy(&params, input.data(), sizeof(params));
LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.addr);
if (!params.handle) {
return static_cast<u32>(NvErrCodes::InvalidValue);
}
if ((params.align - 1) & params.align) {
return static_cast<u32>(NvErrCodes::InvalidValue);
}
const u32 min_alignment = 0x1000;
if (params.align < min_alignment) {
params.align = min_alignment;
}
auto object = GetObject(params.handle);
ASSERT(object);
if (!object) {
return static_cast<u32>(NvErrCodes::InvalidValue);
}
if (object->status == Object::Status::Allocated) {
return static_cast<u32>(NvErrCodes::OperationNotPermitted);
}
object->flags = params.flags;
object->align = params.align;
@@ -76,8 +105,6 @@ u32 nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) {
object->addr = params.addr;
object->status = Object::Status::Allocated;
LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.addr);
std::memcpy(output.data(), &params, sizeof(params));
return 0;
}
@@ -88,8 +115,14 @@ u32 nvmap::IocGetId(const std::vector<u8>& input, std::vector<u8>& output) {
LOG_WARNING(Service_NVDRV, "called");
if (!params.handle) {
return static_cast<u32>(NvErrCodes::InvalidValue);
}
auto object = GetObject(params.handle);
ASSERT(object);
if (!object) {
return static_cast<u32>(NvErrCodes::OperationNotPermitted);
}
params.id = object->id;
@@ -105,7 +138,14 @@ u32 nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output) {
auto itr = std::find_if(handles.begin(), handles.end(),
[&](const auto& entry) { return entry.second->id == params.id; });
ASSERT(itr != handles.end());
if (itr == handles.end()) {
return static_cast<u32>(NvErrCodes::InvalidValue);
}
auto& object = itr->second;
if (object->status != Object::Status::Allocated) {
return static_cast<u32>(NvErrCodes::InvalidValue);
}
itr->second->refcount++;
@@ -125,8 +165,13 @@ u32 nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output) {
LOG_WARNING(Service_NVDRV, "(STUBBED) called type={}", params.param);
auto object = GetObject(params.handle);
ASSERT(object);
ASSERT(object->status == Object::Status::Allocated);
if (!object) {
return static_cast<u32>(NvErrCodes::InvalidValue);
}
if (object->status != Object::Status::Allocated) {
return static_cast<u32>(NvErrCodes::OperationNotPermitted);
}
switch (static_cast<ParamTypes>(params.param)) {
case ParamTypes::Size:
@@ -163,9 +208,12 @@ u32 nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
LOG_WARNING(Service_NVDRV, "(STUBBED) called");
auto itr = handles.find(params.handle);
ASSERT(itr != handles.end());
ASSERT(itr->second->refcount > 0);
if (itr == handles.end()) {
return static_cast<u32>(NvErrCodes::InvalidValue);
}
if (!itr->second->refcount) {
return static_cast<u32>(NvErrCodes::InvalidValue);
}
itr->second->refcount--;

View File

@@ -36,9 +36,9 @@ public:
RenderTargetFormat format;
BitField<0, 1, u32> linear;
union {
BitField<0, 4, u32> block_depth;
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_width;
BitField<8, 4, u32> block_depth;
};
u32 depth;
u32 layer;
@@ -53,10 +53,20 @@ public:
address_low);
}
u32 BlockWidth() const {
// The block width is stored in log2 format.
return 1 << block_width;
}
u32 BlockHeight() const {
// The block height is stored in log2 format.
return 1 << block_height;
}
u32 BlockDepth() const {
// The block depth is stored in log2 format.
return 1 << block_depth;
}
};
static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");

View File

@@ -347,6 +347,16 @@ public:
DecrWrap = 8,
};
enum class MemoryLayout : u32 {
Linear = 0,
BlockLinear = 1,
};
enum class InvMemoryLayout : u32 {
BlockLinear = 0,
Linear = 1,
};
struct Cull {
enum class FrontFace : u32 {
ClockWise = 0x0900,
@@ -432,7 +442,12 @@ public:
u32 width;
u32 height;
Tegra::RenderTargetFormat format;
u32 block_dimensions;
union {
BitField<0, 3, u32> block_width;
BitField<4, 3, u32> block_height;
BitField<8, 3, u32> block_depth;
BitField<12, 1, InvMemoryLayout> type;
} memory_layout;
u32 array_mode;
u32 layer_stride;
u32 base_layer;
@@ -532,7 +547,21 @@ public:
INSERT_PADDING_WORDS(0x3);
s32 clear_stencil;
INSERT_PADDING_WORDS(0x6C);
INSERT_PADDING_WORDS(0x17);
struct {
u32 enable;
union {
BitField<0, 16, u32> min_x;
BitField<16, 16, u32> max_x;
};
union {
BitField<0, 16, u32> min_y;
BitField<16, 16, u32> max_y;
};
} scissor_test;
INSERT_PADDING_WORDS(0x52);
s32 stencil_back_func_ref;
u32 stencil_back_mask;
@@ -548,7 +577,12 @@ public:
u32 address_high;
u32 address_low;
Tegra::DepthFormat format;
u32 block_dimensions;
union {
BitField<0, 4, u32> block_width;
BitField<4, 4, u32> block_height;
BitField<8, 4, u32> block_depth;
BitField<20, 1, InvMemoryLayout> type;
} memory_layout;
u32 layer_stride;
GPUVAddr Address() const {
@@ -1002,6 +1036,7 @@ ASSERT_REG_POSITION(vertex_buffer, 0x35D);
ASSERT_REG_POSITION(clear_color[0], 0x360);
ASSERT_REG_POSITION(clear_depth, 0x364);
ASSERT_REG_POSITION(clear_stencil, 0x368);
ASSERT_REG_POSITION(scissor_test, 0x380);
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);

View File

@@ -214,6 +214,18 @@ enum class IMinMaxExchange : u64 {
XHi = 3,
};
enum class VmadType : u64 {
Size16_Low = 0,
Size16_High = 1,
Size32 = 2,
Invalid = 3,
};
enum class VmadShr : u64 {
Shr7 = 1,
Shr15 = 2,
};
enum class XmadMode : u64 {
None = 0,
CLo = 1,
@@ -314,6 +326,15 @@ enum class TextureMiscMode : u64 {
PTP,
};
enum class IsberdMode : u64 {
None = 0,
Patch = 1,
Prim = 2,
Attr = 3,
};
enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };
enum class IpaInterpMode : u64 {
Linear = 0,
Perspective = 1,
@@ -340,6 +361,87 @@ struct IpaMode {
}
};
enum class SystemVariable : u64 {
LaneId = 0x00,
VirtCfg = 0x02,
VirtId = 0x03,
Pm0 = 0x04,
Pm1 = 0x05,
Pm2 = 0x06,
Pm3 = 0x07,
Pm4 = 0x08,
Pm5 = 0x09,
Pm6 = 0x0a,
Pm7 = 0x0b,
OrderingTicket = 0x0f,
PrimType = 0x10,
InvocationId = 0x11,
Ydirection = 0x12,
ThreadKill = 0x13,
ShaderType = 0x14,
DirectBeWriteAddressLow = 0x15,
DirectBeWriteAddressHigh = 0x16,
DirectBeWriteEnabled = 0x17,
MachineId0 = 0x18,
MachineId1 = 0x19,
MachineId2 = 0x1a,
MachineId3 = 0x1b,
Affinity = 0x1c,
InvocationInfo = 0x1d,
WscaleFactorXY = 0x1e,
WscaleFactorZ = 0x1f,
Tid = 0x20,
TidX = 0x21,
TidY = 0x22,
TidZ = 0x23,
CtaParam = 0x24,
CtaIdX = 0x25,
CtaIdY = 0x26,
CtaIdZ = 0x27,
NtId = 0x28,
CirQueueIncrMinusOne = 0x29,
Nlatc = 0x2a,
SmSpaVersion = 0x2c,
MultiPassShaderInfo = 0x2d,
LwinHi = 0x2e,
SwinHi = 0x2f,
SwinLo = 0x30,
SwinSz = 0x31,
SmemSz = 0x32,
SmemBanks = 0x33,
LwinLo = 0x34,
LwinSz = 0x35,
LmemLosz = 0x36,
LmemHioff = 0x37,
EqMask = 0x38,
LtMask = 0x39,
LeMask = 0x3a,
GtMask = 0x3b,
GeMask = 0x3c,
RegAlloc = 0x3d,
CtxAddr = 0x3e, // .fmask = F_SM50
BarrierAlloc = 0x3e, // .fmask = F_SM60
GlobalErrorStatus = 0x40,
WarpErrorStatus = 0x42,
WarpErrorStatusClear = 0x43,
PmHi0 = 0x48,
PmHi1 = 0x49,
PmHi2 = 0x4a,
PmHi3 = 0x4b,
PmHi4 = 0x4c,
PmHi5 = 0x4d,
PmHi6 = 0x4e,
PmHi7 = 0x4f,
ClockLo = 0x50,
ClockHi = 0x51,
GlobalTimerLo = 0x52,
GlobalTimerHi = 0x53,
HwTaskId = 0x60,
CircularQueueEntryIndex = 0x61,
CircularQueueEntryAddressLow = 0x62,
CircularQueueEntryAddressHigh = 0x63,
};
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -362,6 +464,7 @@ union Instruction {
BitField<48, 16, u64> opcode;
union {
BitField<20, 16, u64> imm20_16;
BitField<20, 19, u64> imm20_19;
BitField<20, 32, s64> imm20_32;
BitField<45, 1, u64> negate_b;
@@ -403,6 +506,10 @@ union Instruction {
}
} lop3;
u16 GetImm20_16() const {
return static_cast<u16>(imm20_16);
}
u32 GetImm20_19() const {
u32 imm{static_cast<u32>(imm20_19)};
imm <<= 12;
@@ -914,6 +1021,35 @@ union Instruction {
}
} bra;
union {
BitField<39, 1, u64> emit; // EmitVertex
BitField<40, 1, u64> cut; // EndPrimitive
} out;
union {
BitField<31, 1, u64> skew;
BitField<32, 1, u64> o;
BitField<33, 2, IsberdMode> mode;
BitField<47, 2, IsberdShift> shift;
} isberd;
union {
BitField<48, 1, u64> signed_a;
BitField<38, 1, u64> is_byte_chunk_a;
BitField<36, 2, VmadType> type_a;
BitField<36, 2, u64> byte_height_a;
BitField<49, 1, u64> signed_b;
BitField<50, 1, u64> use_register_b;
BitField<30, 1, u64> is_byte_chunk_b;
BitField<28, 2, VmadType> type_b;
BitField<28, 2, u64> byte_height_b;
BitField<51, 2, VmadShr> shr;
BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
BitField<47, 1, u64> cc;
} vmad;
union {
BitField<20, 16, u64> imm20_16;
BitField<36, 1, u64> product_shift_left;
@@ -936,6 +1072,10 @@ union Instruction {
BitField<36, 5, u64> index;
} cbuf36;
// Unsure about the size of this one.
// It's always used with a gpr0, so any size should be fine.
BitField<20, 8, SystemVariable> sys20;
BitField<47, 1, u64> generates_cc;
BitField<61, 1, u64> is_b_imm;
BitField<60, 1, u64> is_b_gpr;
@@ -975,6 +1115,9 @@ public:
TMML, // Texture Mip Map Level
EXIT,
IPA,
OUT_R, // Emit vertex/primitive
ISBERD,
VMAD,
FFMA_IMM, // Fused Multiply and Add
FFMA_CR,
FFMA_RC,
@@ -1034,6 +1177,7 @@ public:
MOV_C,
MOV_R,
MOV_IMM,
MOV_SYS,
MOV32_IMM,
SHL_C,
SHL_R,
@@ -1209,6 +1353,9 @@ private:
INST("1101111101011---", Id::TMML, Type::Memory, "TMML"),
INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"),
INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
@@ -1255,6 +1402,7 @@ private:
INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
INST("1111000011001---", Id::MOV_SYS, Type::Trivial, "MOV_SYS"),
INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),

View File

@@ -255,7 +255,7 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
return params;
}
void RasterizerOpenGL::SetupShaders() {
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader);
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
@@ -270,6 +270,11 @@ void RasterizerOpenGL::SetupShaders() {
// Skip stages that are not enabled
if (!gpu.regs.IsShaderConfigEnabled(index)) {
switch (program) {
case Maxwell::ShaderProgram::Geometry:
shader_program_manager->UseTrivialGeometryShader();
break;
}
continue;
}
@@ -288,11 +293,18 @@ void RasterizerOpenGL::SetupShaders() {
switch (program) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB: {
shader_program_manager->UseProgrammableVertexShader(shader->GetProgramHandle());
shader_program_manager->UseProgrammableVertexShader(
shader->GetProgramHandle(primitive_mode));
break;
}
case Maxwell::ShaderProgram::Geometry: {
shader_program_manager->UseProgrammableGeometryShader(
shader->GetProgramHandle(primitive_mode));
break;
}
case Maxwell::ShaderProgram::Fragment: {
shader_program_manager->UseProgrammableFragmentShader(shader->GetProgramHandle());
shader_program_manager->UseProgrammableFragmentShader(
shader->GetProgramHandle(primitive_mode));
break;
}
default:
@@ -302,12 +314,13 @@ void RasterizerOpenGL::SetupShaders() {
}
// Configure the const buffers for this shader stage.
current_constbuffer_bindpoint = SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage),
shader, current_constbuffer_bindpoint);
current_constbuffer_bindpoint =
SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
current_constbuffer_bindpoint);
// Configure the textures for this shader stage.
current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
current_texture_bindpoint);
primitive_mode, current_texture_bindpoint);
// When VertexA is enabled, we have dual vertex shaders
if (program == Maxwell::ShaderProgram::VertexA) {
@@ -317,8 +330,6 @@ void RasterizerOpenGL::SetupShaders() {
}
state.Apply();
shader_program_manager->UseTrivialGeometryShader();
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -541,6 +552,7 @@ void RasterizerOpenGL::DrawArrays() {
SyncLogicOpState();
SyncCullMode();
SyncAlphaTest();
SyncScissorTest();
SyncTransformFeedback();
SyncPointState();
@@ -580,7 +592,7 @@ void RasterizerOpenGL::DrawArrays() {
SetupVertexArrays();
DrawParameters params = SetupDraw();
SetupShaders();
SetupShaders(params.primitive_mode);
buffer_cache.Unmap();
@@ -719,7 +731,7 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
}
u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader,
u32 current_bindpoint) {
GLenum primitive_mode, u32 current_bindpoint) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
@@ -771,7 +783,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
// Now configure the bindpoint of the buffer inside the shader
glUniformBlockBinding(shader->GetProgramHandle(),
glUniformBlockBinding(shader->GetProgramHandle(primitive_mode),
shader->GetProgramResourceIndex(used_buffer),
current_bindpoint + bindpoint);
@@ -787,7 +799,8 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
return current_bindpoint + static_cast<u32>(entries.size());
}
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, u32 current_unit) {
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_unit) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
@@ -802,8 +815,8 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
// Bind the uniform to the sampler.
glProgramUniform1i(shader->GetProgramHandle(), shader->GetUniformLocation(entry),
current_bindpoint);
glProgramUniform1i(shader->GetProgramHandle(primitive_mode),
shader->GetUniformLocation(entry), current_bindpoint);
const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
@@ -972,6 +985,22 @@ void RasterizerOpenGL::SyncAlphaTest() {
}
}
void RasterizerOpenGL::SyncScissorTest() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
state.scissor.enabled = (regs.scissor_test.enable != 0);
// TODO(Blinkhawk): Figure if the hardware supports scissor testing per viewport and how it's
// implemented.
if (regs.scissor_test.enable != 0) {
const u32 width = regs.scissor_test.max_x - regs.scissor_test.min_x;
const u32 height = regs.scissor_test.max_y - regs.scissor_test.min_y;
state.scissor.x = regs.scissor_test.min_x;
state.scissor.y = regs.scissor_test.min_y;
state.scissor.width = width;
state.scissor.height = height;
}
}
void RasterizerOpenGL::SyncTransformFeedback() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;

View File

@@ -120,7 +120,7 @@ private:
* @returns The next available bindpoint for use in the next shader stage.
*/
u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
u32 current_bindpoint);
GLenum primitive_mode, u32 current_bindpoint);
/*
* Configures the current textures to use for the draw command.
@@ -130,7 +130,7 @@ private:
* @returns The next available bindpoint for use in the next shader stage.
*/
u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
u32 current_unit);
GLenum primitive_mode, u32 current_unit);
/// Syncs the viewport to match the guest state
void SyncViewport();
@@ -165,6 +165,9 @@ private:
/// Syncs the alpha test state to match the guest state
void SyncAlphaTest();
/// Syncs the scissor test state to match the guest state
void SyncScissorTest();
/// Syncs the transform feedback state to match the guest state
void SyncTransformFeedback();
@@ -207,7 +210,7 @@ private:
DrawParameters SetupDraw();
void SetupShaders();
void SetupShaders(GLenum primitive_mode);
enum class AccelDraw { Disabled, Arrays, Indexed };
AccelDraw accelerate_draw = AccelDraw::Disabled;

View File

@@ -45,7 +45,9 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
SurfaceParams params{};
params.addr = TryGetCpuAddr(config.tic.Address());
params.is_tiled = config.tic.IsTiled();
params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
params.pixel_format =
PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value());
params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
@@ -97,8 +99,11 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
SurfaceParams params{};
params.addr = TryGetCpuAddr(config.Address());
params.is_tiled = true;
params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
params.is_tiled =
config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
params.block_width = 1 << config.memory_layout.block_width;
params.block_height = 1 << config.memory_layout.block_height;
params.block_depth = 1 << config.memory_layout.block_depth;
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
params.component_type = ComponentTypeFromRenderTarget(config.format);
params.type = GetFormatType(params.pixel_format);
@@ -120,13 +125,16 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
return params;
}
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
Tegra::GPUVAddr zeta_address,
Tegra::DepthFormat format) {
/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
SurfaceParams params{};
params.addr = TryGetCpuAddr(zeta_address);
params.is_tiled = true;
params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
params.block_width = 1 << std::min(block_width, 5U);
params.block_height = 1 << std::min(block_height, 5U);
params.block_depth = 1 << std::min(block_depth, 5U);
params.pixel_format = PixelFormatFromDepthFormat(format);
params.component_type = ComponentTypeFromDepthFormat(format);
params.type = GetFormatType(params.pixel_format);
@@ -148,7 +156,9 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
SurfaceParams params{};
params.addr = TryGetCpuAddr(config.Address());
params.is_tiled = !config.linear;
params.block_height = params.is_tiled ? config.BlockHeight() : 0,
params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
params.component_type = ComponentTypeFromRenderTarget(config.format);
params.type = GetFormatType(params.pixel_format);
@@ -818,6 +828,11 @@ void CachedSurface::LoadGLBuffer() {
if (params.is_tiled) {
gl_buffer.resize(total_size);
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
params.block_width, static_cast<u32>(params.target));
ASSERT_MSG(params.block_depth == 1, "Block depth is defined as {} on texture type {}",
params.block_depth, static_cast<u32>(params.target));
// TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do
// this for 3D textures, etc.
switch (params.target) {
@@ -989,7 +1004,9 @@ Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
}
SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer(
regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format)};
regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format,
regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
return GetSurface(depth_params, preserve_contents);
}

View File

@@ -716,9 +716,10 @@ struct SurfaceParams {
static SurfaceParams CreateForFramebuffer(std::size_t index);
/// Creates SurfaceParams for a depth buffer configuration
static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
Tegra::GPUVAddr zeta_address,
Tegra::DepthFormat format);
static SurfaceParams CreateForDepthBuffer(
u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
u32 block_width, u32 block_height, u32 block_depth,
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
/// Creates SurfaceParams for a Fermi2D surface copy
static SurfaceParams CreateForFermiCopySurface(
@@ -733,7 +734,9 @@ struct SurfaceParams {
VAddr addr;
bool is_tiled;
u32 block_width;
u32 block_height;
u32 block_depth;
PixelFormat pixel_format;
ComponentType component_type;
SurfaceType type;

View File

@@ -68,6 +68,10 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
program_result = GLShader::GenerateVertexShader(setup);
gl_type = GL_VERTEX_SHADER;
break;
case Maxwell::ShaderProgram::Geometry:
program_result = GLShader::GenerateGeometryShader(setup);
gl_type = GL_GEOMETRY_SHADER;
break;
case Maxwell::ShaderProgram::Fragment:
program_result = GLShader::GenerateFragmentShader(setup);
gl_type = GL_FRAGMENT_SHADER;
@@ -80,11 +84,16 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
entries = program_result.second;
OGLShader shader;
shader.Create(program_result.first.c_str(), gl_type);
program.Create(true, shader.handle);
SetShaderUniformBlockBindings(program.handle);
VideoCore::LabelGLObject(GL_PROGRAM, program.handle, addr);
if (program_type != Maxwell::ShaderProgram::Geometry) {
OGLShader shader;
shader.Create(program_result.first.c_str(), gl_type);
program.Create(true, shader.handle);
SetShaderUniformBlockBindings(program.handle);
VideoCore::LabelGLObject(GL_PROGRAM, program.handle, addr);
} else {
// Store shader's code to lazily build it on draw
geometry_programs.code = program_result.first;
}
}
GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
@@ -110,6 +119,21 @@ GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
return search->second;
}
GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program,
const std::string& glsl_topology,
const std::string& debug_name) {
if (target_program.handle != 0) {
return target_program.handle;
}
const std::string source{geometry_programs.code + "layout (" + glsl_topology + ") in;\n"};
OGLShader shader;
shader.Create(source.c_str(), GL_GEOMETRY_SHADER);
target_program.Create(true, shader.handle);
SetShaderUniformBlockBindings(target_program.handle);
VideoCore::LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
return target_program.handle;
};
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const VAddr program_addr{GetShaderAddress(program)};

View File

@@ -7,6 +7,7 @@
#include <map>
#include <memory>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -38,8 +39,31 @@ public:
}
/// Gets the GL program handle for the shader
GLuint GetProgramHandle() const {
return program.handle;
GLuint GetProgramHandle(GLenum primitive_mode) {
if (program_type != Maxwell::ShaderProgram::Geometry) {
return program.handle;
}
switch (primitive_mode) {
case GL_POINTS:
return LazyGeometryProgram(geometry_programs.points, "points", "ShaderPoints");
case GL_LINES:
case GL_LINE_STRIP:
return LazyGeometryProgram(geometry_programs.lines, "lines", "ShaderLines");
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency",
"ShaderLinesAdjacency");
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
return LazyGeometryProgram(geometry_programs.triangles, "triangles", "ShaderTriangles");
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency",
"ShaderLines");
default:
UNREACHABLE_MSG("Unknown primitive mode.");
}
}
/// Gets the GL program resource location for the specified resource, caching as needed
@@ -49,12 +73,30 @@ public:
GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);
private:
/// Generates a geometry shader or returns one that already exists.
GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology,
const std::string& debug_name);
VAddr addr;
Maxwell::ShaderProgram program_type;
GLShader::ShaderSetup setup;
GLShader::ShaderEntries entries;
// Non-geometry program.
OGLProgram program;
// Geometry programs. These are needed because GLSL needs an input topology but it's not
// declared by the hardware. Workaround this issue by generating a different shader per input
// topology class.
struct {
std::string code;
OGLProgram points;
OGLProgram lines;
OGLProgram lines_adjacency;
OGLProgram triangles;
OGLProgram triangles_adjacency;
} geometry_programs;
std::map<u32, GLuint> resource_cache;
std::map<u32, GLint> uniform_cache;
};

View File

@@ -7,6 +7,7 @@
#include <string>
#include <string_view>
#include <boost/optional.hpp>
#include <fmt/format.h>
#include "common/assert.h"
@@ -29,11 +30,32 @@ using Tegra::Shader::SubOp;
constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
constexpr u32 MAX_GEOMETRY_BUFFERS = 6;
constexpr u32 MAX_ATTRIBUTES = 0x100; // Size in vec4s, this value is untested
class DecompileFail : public std::runtime_error {
public:
using std::runtime_error::runtime_error;
};
/// Translate topology
static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
switch (topology) {
case Tegra::Shader::OutputTopology::PointList:
return "points";
case Tegra::Shader::OutputTopology::LineStrip:
return "line_strip";
case Tegra::Shader::OutputTopology::TriangleStrip:
return "triangle_strip";
default:
LOG_CRITICAL(Render_OpenGL, "Unknown output topology {}", static_cast<u32>(topology));
UNREACHABLE();
return "points";
}
}
/// Describes the behaviour of code path of a given entry point and a return point.
enum class ExitMethod {
Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
@@ -253,8 +275,9 @@ enum class InternalFlag : u64 {
class GLSLRegisterManager {
public:
GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations,
const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix)
: shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix} {
const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix,
const Tegra::Shader::Header& header)
: shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header} {
BuildRegisterList();
BuildInputList();
}
@@ -358,11 +381,13 @@ public:
* @param reg The destination register to use.
* @param elem The element to use for the operation.
* @param attribute The input attribute to use as the source value.
* @param vertex The register that decides which vertex to read from (used in GS).
*/
void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute,
const Tegra::Shader::IpaMode& input_mode) {
const Tegra::Shader::IpaMode& input_mode,
boost::optional<Register> vertex = {}) {
const std::string dest = GetRegisterAsFloat(reg);
const std::string src = GetInputAttribute(attribute, input_mode) + GetSwizzle(elem);
const std::string src = GetInputAttribute(attribute, input_mode, vertex) + GetSwizzle(elem);
shader.AddLine(dest + " = " + src + ';');
}
@@ -391,16 +416,29 @@ public:
* are stored as floats, so this may require conversion.
* @param attribute The destination output attribute.
* @param elem The element to use for the operation.
* @param reg The register to use as the source value.
* @param val_reg The register to use as the source value.
* @param buf_reg The register that tells which buffer to write to (used in geometry shaders).
*/
void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) {
void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& val_reg,
const Register& buf_reg) {
const std::string dest = GetOutputAttribute(attribute);
const std::string src = GetRegisterAsFloat(reg);
const std::string src = GetRegisterAsFloat(val_reg);
if (!dest.empty()) {
// Can happen with unknown/unimplemented output attributes, in which case we ignore the
// instruction for now.
shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
// TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry
// shader. These instructions use a dirty register as buffer index. To avoid some
// drivers from complaining for the out of boundary writes, guard them.
const std::string buf_index{"min(" + GetRegisterAsInteger(buf_reg) + ", " +
std::to_string(MAX_GEOMETRY_BUFFERS - 1) + ')'};
shader.AddLine("amem[" + buf_index + "][" +
std::to_string(static_cast<u32>(attribute)) + ']' +
GetSwizzle(elem) + " = " + src + ';');
} else {
shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
}
}
}
@@ -441,58 +479,18 @@ public:
}
}
/// Add declarations for registers
/// Add declarations.
void GenerateDeclarations(const std::string& suffix) {
for (const auto& reg : regs) {
declarations.AddLine(GLSLRegister::GetTypeString() + ' ' + reg.GetPrefixString() +
std::to_string(reg.GetIndex()) + '_' + suffix + " = 0;");
}
declarations.AddNewLine();
for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) {
const InternalFlag code = static_cast<InternalFlag>(ii);
declarations.AddLine("bool " + GetInternalFlag(code) + " = false;");
}
declarations.AddNewLine();
for (const auto element : declr_input_attribute) {
// TODO(bunnei): Use proper number of elements for these
u32 idx =
static_cast<u32>(element.first) - static_cast<u32>(Attribute::Index::Attribute_0);
declarations.AddLine("layout(location = " + std::to_string(idx) + ")" +
GetInputFlags(element.first) + "in vec4 " +
GetInputAttribute(element.first, element.second) + ';');
}
declarations.AddNewLine();
for (const auto& index : declr_output_attribute) {
// TODO(bunnei): Use proper number of elements for these
declarations.AddLine("layout(location = " +
std::to_string(static_cast<u32>(index) -
static_cast<u32>(Attribute::Index::Attribute_0)) +
") out vec4 " + GetOutputAttribute(index) + ';');
}
declarations.AddNewLine();
for (const auto& entry : GetConstBuffersDeclarations()) {
declarations.AddLine("layout(std140) uniform " + entry.GetName());
declarations.AddLine('{');
declarations.AddLine(" vec4 c" + std::to_string(entry.GetIndex()) +
"[MAX_CONSTBUFFER_ELEMENTS];");
declarations.AddLine("};");
declarations.AddNewLine();
}
declarations.AddNewLine();
const auto& samplers = GetSamplers();
for (const auto& sampler : samplers) {
declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() +
';');
}
declarations.AddNewLine();
GenerateRegisters(suffix);
GenerateInternalFlags();
GenerateInputAttrs();
GenerateOutputAttrs();
GenerateConstBuffers();
GenerateSamplers();
GenerateGeometry();
}
/// Returns a list of constant buffer declarations
/// Returns a list of constant buffer declarations.
std::vector<ConstBufferEntry> GetConstBuffersDeclarations() const {
std::vector<ConstBufferEntry> result;
std::copy_if(declr_const_buffers.begin(), declr_const_buffers.end(),
@@ -500,7 +498,7 @@ public:
return result;
}
/// Returns a list of samplers used in the shader
/// Returns a list of samplers used in the shader.
const std::vector<SamplerEntry>& GetSamplers() const {
return used_samplers;
}
@@ -509,7 +507,7 @@ public:
/// necessary.
std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
bool is_array, bool is_shadow) {
const std::size_t offset = static_cast<std::size_t>(sampler.index.Value());
const auto offset = static_cast<std::size_t>(sampler.index.Value());
// If this sampler has already been used, return the existing mapping.
const auto itr =
@@ -530,6 +528,129 @@ public:
}
private:
/// Generates declarations for registers.
void GenerateRegisters(const std::string& suffix) {
for (const auto& reg : regs) {
declarations.AddLine(GLSLRegister::GetTypeString() + ' ' + reg.GetPrefixString() +
std::to_string(reg.GetIndex()) + '_' + suffix + " = 0;");
}
declarations.AddNewLine();
}
/// Generates declarations for internal flags.
void GenerateInternalFlags() {
for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) {
const InternalFlag code = static_cast<InternalFlag>(ii);
declarations.AddLine("bool " + GetInternalFlag(code) + " = false;");
}
declarations.AddNewLine();
}
/// Generates declarations for input attributes.
void GenerateInputAttrs() {
if (stage != Maxwell3D::Regs::ShaderStage::Vertex) {
const std::string attr =
stage == Maxwell3D::Regs::ShaderStage::Geometry ? "gs_position[]" : "position";
declarations.AddLine("layout (location = " + std::to_string(POSITION_VARYING_LOCATION) +
") in vec4 " + attr + ';');
}
for (const auto element : declr_input_attribute) {
// TODO(bunnei): Use proper number of elements for these
u32 idx =
static_cast<u32>(element.first) - static_cast<u32>(Attribute::Index::Attribute_0);
if (stage != Maxwell3D::Regs::ShaderStage::Vertex) {
// If inputs are varyings, add an offset
idx += GENERIC_VARYING_START_LOCATION;
}
std::string attr{GetInputAttribute(element.first, element.second)};
if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
attr = "gs_" + attr + "[]";
}
declarations.AddLine("layout (location = " + std::to_string(idx) + ") " +
GetInputFlags(element.first) + "in vec4 " + attr + ';');
}
declarations.AddNewLine();
}
/// Generates declarations for output attributes.
void GenerateOutputAttrs() {
if (stage != Maxwell3D::Regs::ShaderStage::Fragment) {
declarations.AddLine("layout (location = " + std::to_string(POSITION_VARYING_LOCATION) +
") out vec4 position;");
}
for (const auto& index : declr_output_attribute) {
// TODO(bunnei): Use proper number of elements for these
const u32 idx = static_cast<u32>(index) -
static_cast<u32>(Attribute::Index::Attribute_0) +
GENERIC_VARYING_START_LOCATION;
declarations.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " +
GetOutputAttribute(index) + ';');
}
declarations.AddNewLine();
}
/// Generates declarations for constant buffers.
void GenerateConstBuffers() {
for (const auto& entry : GetConstBuffersDeclarations()) {
declarations.AddLine("layout (std140) uniform " + entry.GetName());
declarations.AddLine('{');
declarations.AddLine(" vec4 c" + std::to_string(entry.GetIndex()) +
"[MAX_CONSTBUFFER_ELEMENTS];");
declarations.AddLine("};");
declarations.AddNewLine();
}
declarations.AddNewLine();
}
/// Generates declarations for samplers.
void GenerateSamplers() {
const auto& samplers = GetSamplers();
for (const auto& sampler : samplers) {
declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() +
';');
}
declarations.AddNewLine();
}
/// Generates declarations used for geometry shaders.
void GenerateGeometry() {
if (stage != Maxwell3D::Regs::ShaderStage::Geometry)
return;
declarations.AddLine(
"layout (" + GetTopologyName(header.common3.output_topology) +
", max_vertices = " + std::to_string(header.common4.max_output_vertices) + ") out;");
declarations.AddNewLine();
declarations.AddLine("vec4 amem[" + std::to_string(MAX_GEOMETRY_BUFFERS) + "][" +
std::to_string(MAX_ATTRIBUTES) + "];");
declarations.AddNewLine();
constexpr char buffer[] = "amem[output_buffer]";
declarations.AddLine("void emit_vertex(uint output_buffer) {");
++declarations.scope;
for (const auto element : declr_output_attribute) {
declarations.AddLine(GetOutputAttribute(element) + " = " + buffer + '[' +
std::to_string(static_cast<u32>(element)) + "];");
}
declarations.AddLine("position = " + std::string(buffer) + '[' +
std::to_string(static_cast<u32>(Attribute::Index::Position)) + "];");
// If a geometry shader is attached, it will always flip (it's the last stage before
// fragment). For more info about flipping, refer to gl_shader_gen.cpp.
declarations.AddLine("position.xy *= viewport_flip.xy;");
declarations.AddLine("gl_Position = position;");
declarations.AddLine("position.w = 1.0;");
declarations.AddLine("EmitVertex();");
--declarations.scope;
declarations.AddLine('}');
declarations.AddNewLine();
}
/// Generates code representing a temporary (GPR) register.
std::string GetRegister(const Register& reg, unsigned elem) {
if (reg == Register::ZeroIndex) {
@@ -586,11 +707,19 @@ private:
/// Generates code representing an input attribute register.
std::string GetInputAttribute(Attribute::Index attribute,
const Tegra::Shader::IpaMode& input_mode) {
const Tegra::Shader::IpaMode& input_mode,
boost::optional<Register> vertex = {}) {
auto GeometryPass = [&](const std::string& name) {
if (stage == Maxwell3D::Regs::ShaderStage::Geometry && vertex) {
return "gs_" + name + '[' + GetRegisterAsInteger(vertex.value(), 0, false) + ']';
}
return name;
};
switch (attribute) {
case Attribute::Index::Position:
if (stage != Maxwell3D::Regs::ShaderStage::Fragment) {
return "position";
return GeometryPass("position");
} else {
return "vec4(gl_FragCoord.x, gl_FragCoord.y, gl_FragCoord.z, 1.0)";
}
@@ -619,7 +748,7 @@ private:
UNREACHABLE();
}
}
return "input_attribute_" + std::to_string(index);
return GeometryPass("input_attribute_" + std::to_string(index));
}
LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", static_cast<u32>(attribute));
@@ -672,7 +801,7 @@ private:
return out;
}
/// Generates code representing an output attribute register.
/// Generates code representing the declaration name of an output attribute register.
std::string GetOutputAttribute(Attribute::Index attribute) {
switch (attribute) {
case Attribute::Index::Position:
@@ -708,6 +837,7 @@ private:
std::vector<SamplerEntry> used_samplers;
const Maxwell3D::Regs::ShaderStage& stage;
const std::string& suffix;
const Tegra::Shader::Header& header;
};
class GLSLGenerator {
@@ -1103,8 +1233,8 @@ private:
return offset + 1;
}
shader.AddLine("// " + std::to_string(offset) + ": " + opcode->GetName() + " (" +
std::to_string(instr.value) + ')');
shader.AddLine(
fmt::format("// {}: {} (0x{:016x})", offset, opcode->GetName(), instr.value));
using Tegra::Shader::Pred;
ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute,
@@ -1826,7 +1956,7 @@ private:
const auto LoadNextElement = [&](u32 reg_offset) {
regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
static_cast<Attribute::Index>(next_index),
input_mode);
input_mode, instr.gpr39.Value());
// Load the next attribute element into the following register. If the element
// to load goes beyond the vec4 size, load the first element of the next
@@ -1890,8 +2020,8 @@ private:
const auto StoreNextElement = [&](u32 reg_offset) {
regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
next_element,
instr.gpr0.Value() + reg_offset);
next_element, instr.gpr0.Value() + reg_offset,
instr.gpr39.Value());
// Load the next attribute element into the following register. If the element
// to load goes beyond the vec4 size, load the first element of the next
@@ -2734,6 +2864,52 @@ private:
break;
}
case OpCode::Id::OUT_R: {
ASSERT(instr.gpr20.Value() == Register::ZeroIndex);
ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry,
"OUT is expected to be used in a geometry shader.");
if (instr.out.emit) {
// gpr0 is used to store the next address. Hardware returns a pointer but
// we just return the next index with a cyclic cap.
const std::string current{regs.GetRegisterAsInteger(instr.gpr8, 0, false)};
const std::string next = "((" + current + " + 1" + ") % " +
std::to_string(MAX_GEOMETRY_BUFFERS) + ')';
shader.AddLine("emit_vertex(" + current + ");");
regs.SetRegisterToInteger(instr.gpr0, false, 0, next, 1, 1);
}
if (instr.out.cut) {
shader.AddLine("EndPrimitive();");
}
break;
}
case OpCode::Id::MOV_SYS: {
switch (instr.sys20) {
case Tegra::Shader::SystemVariable::InvocationInfo: {
LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1);
break;
}
default: {
LOG_CRITICAL(HW_GPU, "Unhandled system move: {}",
static_cast<u32>(instr.sys20.Value()));
UNREACHABLE();
}
}
break;
}
case OpCode::Id::ISBERD: {
ASSERT(instr.isberd.o == 0);
ASSERT(instr.isberd.skew == 0);
ASSERT(instr.isberd.shift == Tegra::Shader::IsberdShift::None);
ASSERT(instr.isberd.mode == Tegra::Shader::IsberdMode::None);
ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry,
"ISBERD is expected to be used in a geometry shader.");
LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
regs.SetRegisterToFloat(instr.gpr0, 0, regs.GetRegisterAsFloat(instr.gpr8), 1, 1);
break;
}
case OpCode::Id::BRA: {
ASSERT_MSG(instr.bra.constant_buffer == 0,
"BRA with constant buffers are not implemented");
@@ -2777,6 +2953,88 @@ private:
LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
break;
}
case OpCode::Id::VMAD: {
const bool signed_a = instr.vmad.signed_a == 1;
const bool signed_b = instr.vmad.signed_b == 1;
const bool result_signed = signed_a || signed_b;
boost::optional<std::string> forced_result;
auto Unpack = [&](const std::string& op, bool is_chunk, bool is_signed,
Tegra::Shader::VmadType type, u64 byte_height) {
const std::string value = [&]() {
if (!is_chunk) {
const auto offset = static_cast<u32>(byte_height * 8);
return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)";
}
const std::string zero = "0";
switch (type) {
case Tegra::Shader::VmadType::Size16_Low:
return '(' + op + " & 0xffff)";
case Tegra::Shader::VmadType::Size16_High:
return '(' + op + " >> 16)";
case Tegra::Shader::VmadType::Size32:
// TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when
// this type is used (1 * 1 + 0 == 0x5b800000). Until a better
// explanation is found: assert.
UNREACHABLE_MSG("Unimplemented");
return zero;
case Tegra::Shader::VmadType::Invalid:
// Note(Rodrigo): This flag is invalid according to nvdisasm. From my
// testing (even though it's invalid) this makes the whole instruction
// assign zero to target register.
forced_result = boost::make_optional(zero);
return zero;
default:
UNREACHABLE();
return zero;
}
}();
if (is_signed) {
return "int(" + value + ')';
}
return value;
};
const std::string op_a = Unpack(regs.GetRegisterAsInteger(instr.gpr8, 0, false),
instr.vmad.is_byte_chunk_a != 0, signed_a,
instr.vmad.type_a, instr.vmad.byte_height_a);
std::string op_b;
if (instr.vmad.use_register_b) {
op_b = Unpack(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
instr.vmad.is_byte_chunk_b != 0, signed_b, instr.vmad.type_b,
instr.vmad.byte_height_b);
} else {
op_b = '(' +
std::to_string(signed_b ? static_cast<s16>(instr.alu.GetImm20_16())
: instr.alu.GetImm20_16()) +
')';
}
const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed);
std::string result;
if (forced_result) {
result = *forced_result;
} else {
result = '(' + op_a + " * " + op_b + " + " + op_c + ')';
switch (instr.vmad.shr) {
case Tegra::Shader::VmadShr::Shr7:
result = '(' + result + " >> 7)";
break;
case Tegra::Shader::VmadShr::Shr15:
result = '(' + result + " >> 15)";
break;
}
}
regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1,
instr.vmad.saturate == 1, 0, Register::Size::Word,
instr.vmad.cc);
break;
}
default: {
LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
UNREACHABLE();
@@ -2907,7 +3165,7 @@ private:
ShaderWriter shader;
ShaderWriter declarations;
GLSLRegisterManager regs{shader, declarations, stage, suffix};
GLSLRegisterManager regs{shader, declarations, stage, suffix, header};
// Declarations
std::set<std::string> declr_predicates;

View File

@@ -17,7 +17,18 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += Decompiler::GetCommonDeclarations();
out += "bool exec_vertex();\n";
out += R"(
out gl_PerVertex {
vec4 gl_Position;
};
layout(std140) uniform vs_config {
vec4 viewport_flip;
uvec4 instance_id;
uvec4 flip_stage;
};
)";
if (setup.IsDualProgram()) {
out += "bool exec_vertex_b();\n";
@@ -28,19 +39,18 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
Maxwell3D::Regs::ShaderStage::Vertex, "vertex")
.get_value_or({});
out += program.first;
if (setup.IsDualProgram()) {
ProgramResult program_b =
Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b")
.get_value_or({});
out += program_b.first;
}
out += R"(
out gl_PerVertex {
vec4 gl_Position;
};
out vec4 position;
layout (std140) uniform vs_config {
vec4 viewport_flip;
uvec4 instance_id;
};
void main() {
position = vec4(0.0, 0.0, 0.0, 0.0);
exec_vertex();
@@ -52,27 +62,52 @@ void main() {
out += R"(
// Viewport can be flipped, which is unsupported by glViewport
position.xy *= viewport_flip.xy;
// Check if the flip stage is VertexB
if (flip_stage[0] == 1) {
// Viewport can be flipped, which is unsupported by glViewport
position.xy *= viewport_flip.xy;
}
gl_Position = position;
// TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0
// For now, this is here to bring order in lieu of proper emulation
position.w = 1.0;
if (flip_stage[0] == 1) {
position.w = 1.0;
}
}
)";
return {out, program.second};
}
ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += Decompiler::GetCommonDeclarations();
out += "bool exec_geometry();\n";
ProgramResult program =
Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Geometry, "geometry")
.get_value_or({});
out += R"(
out gl_PerVertex {
vec4 gl_Position;
};
layout (std140) uniform gs_config {
vec4 viewport_flip;
uvec4 instance_id;
uvec4 flip_stage;
};
void main() {
exec_geometry();
}
)";
out += program.first;
if (setup.IsDualProgram()) {
ProgramResult program_b =
Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b")
.get_value_or({});
out += program_b.first;
}
return {out, program.second};
}
@@ -87,7 +122,6 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
Maxwell3D::Regs::ShaderStage::Fragment, "fragment")
.get_value_or({});
out += R"(
in vec4 position;
layout(location = 0) out vec4 FragColor0;
layout(location = 1) out vec4 FragColor1;
layout(location = 2) out vec4 FragColor2;
@@ -100,6 +134,7 @@ layout(location = 7) out vec4 FragColor7;
layout (std140) uniform fs_config {
vec4 viewport_flip;
uvec4 instance_id;
uvec4 flip_stage;
};
void main() {
@@ -110,5 +145,4 @@ void main() {
out += program.first;
return {out, program.second};
}
} // namespace OpenGL::GLShader
} // namespace OpenGL::GLShader

View File

@@ -195,6 +195,12 @@ private:
*/
ProgramResult GenerateVertexShader(const ShaderSetup& setup);
/**
* Generates the GLSL geometry shader program source code for the given GS program
* @returns String of the shader source code
*/
ProgramResult GenerateGeometryShader(const ShaderSetup& setup);
/**
* Generates the GLSL fragment shader program source code for the given FS program
* @returns String of the shader source code

View File

@@ -18,6 +18,14 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh
// We only assign the instance to the first component of the vector, the rest is just padding.
instance_id[0] = state.current_instance;
// Assign in which stage the position has to be flipped
// (the last stage before the fragment shader).
if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) {
flip_stage[0] = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
} else {
flip_stage[0] = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
}
}
} // namespace OpenGL::GLShader

View File

@@ -21,8 +21,9 @@ struct MaxwellUniformData {
void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
alignas(16) GLvec4 viewport_flip;
alignas(16) GLuvec4 instance_id;
alignas(16) GLuvec4 flip_stage;
};
static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) == 48, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
@@ -36,6 +37,10 @@ public:
vs = program;
}
void UseProgrammableGeometryShader(GLuint program) {
gs = program;
}
void UseProgrammableFragmentShader(GLuint program) {
fs = program;
}

View File

@@ -161,7 +161,9 @@ struct TICEntry {
BitField<21, 3, TICHeaderVersion> header_version;
};
union {
BitField<0, 3, u32> block_width;
BitField<3, 3, u32> block_height;
BitField<6, 3, u32> block_depth;
// High 16 bits of the pitch value
BitField<0, 16, u32> pitch_high;
@@ -202,13 +204,24 @@ struct TICEntry {
return depth_minus_1 + 1;
}
u32 BlockWidth() const {
ASSERT(IsTiled());
// The block height is stored in log2 format.
return 1 << block_width;
}
u32 BlockHeight() const {
ASSERT(header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey);
ASSERT(IsTiled());
// The block height is stored in log2 format.
return 1 << block_height;
}
u32 BlockDepth() const {
ASSERT(IsTiled());
// The block height is stored in log2 format.
return 1 << block_depth;
}
bool IsTiled() const {
return header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey;

View File

@@ -169,16 +169,20 @@ static void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr,
const std::string nice_addr = fmt::format("0x{:016x}", addr);
std::string object_label;
switch (identifier) {
case GL_TEXTURE:
object_label = extra_info + "@" + nice_addr;
break;
case GL_PROGRAM:
object_label = "ShaderProgram@" + nice_addr;
break;
default:
object_label = fmt::format("Object(0x{:x})@{}", identifier, nice_addr);
break;
if (extra_info.empty()) {
switch (identifier) {
case GL_TEXTURE:
object_label = "Texture@" + nice_addr;
break;
case GL_PROGRAM:
object_label = "Shader@" + nice_addr;
break;
default:
object_label = fmt::format("Object(0x{:x})@{}", identifier, nice_addr);
break;
}
} else {
object_label = extra_info + '@' + nice_addr;
}
glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
}