patch_manager: Move non-Program RomFS patch log to Debug

Normal Program-type patches will still be logged to aid in debugging, but for others (mainly Control), it was moved to Debug.
content_archive: Move get key log to Trace level
2018-10-12 23:27:19 -04:00 · 2018-10-12 23:25:59 -04:00 · 2018-10-12 12:08:08 -04:00 · 2018-10-12 12:07:14 -04:00 · 2018-10-12 12:06:51 -04:00 · 2018-10-12 12:06:22 -04:00
25 changed files with 1026 additions and 220 deletions
--- a/src/core/file_sys/content_archive.cpp
+++ b/src/core/file_sys/content_archive.cpp
@@ -133,7 +133,7 @@ boost::optional<Core::Crypto::Key128> NCA::GetKeyAreaKey(NCASectionCryptoType ty
                     static_cast<u8>(type));
    u128 out_128{};
    memcpy(out_128.data(), out.data(), 16);
-    LOG_DEBUG(Crypto, "called with crypto_rev={:02X}, kak_index={:02X}, key={:016X}{:016X}",
+    LOG_TRACE(Crypto, "called with crypto_rev={:02X}, kak_index={:02X}, key={:016X}{:016X}",
              master_key_id, header.key_index, out_128[1], out_128[0]);

    return out;
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -214,8 +214,14 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t

 VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, ContentRecordType type,
                                     VirtualFile update_raw) const {
-    LOG_INFO(Loader, "Patching RomFS for title_id={:016X}, type={:02X}", title_id,
-             static_cast<u8>(type));
+    const auto log_string = fmt::format("Patching RomFS for title_id={:016X}, type={:02X}",
+                                        title_id, static_cast<u8>(type))
+                                .c_str();
+
+    if (type == ContentRecordType::Program)
+        LOG_INFO(Loader, log_string);
+    else
+        LOG_DEBUG(Loader, log_string);

    if (romfs == nullptr)
        return romfs;
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -22,6 +22,7 @@ enum {
    HandleTableFull = 105,
    InvalidMemoryState = 106,
    InvalidMemoryPermissions = 108,
+    InvalidMemoryRange = 110,
    InvalidThreadPriority = 112,
    InvalidProcessorId = 113,
    InvalidHandle = 114,
@@ -56,6 +57,7 @@ constexpr ResultCode ERR_INVALID_ADDRESS(ErrorModule::Kernel, ErrCodes::InvalidA
 constexpr ResultCode ERR_INVALID_ADDRESS_STATE(ErrorModule::Kernel, ErrCodes::InvalidMemoryState);
 constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS(ErrorModule::Kernel,
                                                    ErrCodes::InvalidMemoryPermissions);
+constexpr ResultCode ERR_INVALID_MEMORY_RANGE(ErrorModule::Kernel, ErrCodes::InvalidMemoryRange);
 constexpr ResultCode ERR_INVALID_HANDLE(ErrorModule::Kernel, ErrCodes::InvalidHandle);
 constexpr ResultCode ERR_INVALID_PROCESSOR_ID(ErrorModule::Kernel, ErrCodes::InvalidProcessorId);
 constexpr ResultCode ERR_INVALID_SIZE(ErrorModule::Kernel, ErrCodes::InvalidSize);
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -39,6 +39,73 @@ namespace {
 constexpr bool Is4KBAligned(VAddr address) {
    return (address & 0xFFF) == 0;
 }
+
+// Checks if address + size is greater than the given address
+// This can return false if the size causes an overflow of a 64-bit type
+// or if the given size is zero.
+constexpr bool IsValidAddressRange(VAddr address, u64 size) {
+    return address + size > address;
+}
+
+// Checks if a given address range lies within a larger address range.
+constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
+                                    VAddr address_range_end) {
+    const VAddr end_address = address + size - 1;
+    return address_range_begin <= address && end_address <= address_range_end - 1;
+}
+
+bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
+    return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
+                                vm.GetAddressSpaceEndAddress());
+}
+
+bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
+    return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
+                                vm.GetNewMapRegionEndAddress());
+}
+
+// Helper function that performs the common sanity checks for svcMapMemory
+// and svcUnmapMemory. This is doable, as both functions perform their sanitizing
+// in the same order.
+ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_addr, VAddr src_addr,
+                                      u64 size) {
+    if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) {
+        return ERR_INVALID_ADDRESS;
+    }
+
+    if (size == 0 || !Is4KBAligned(size)) {
+        return ERR_INVALID_SIZE;
+    }
+
+    if (!IsValidAddressRange(dst_addr, size)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (!IsValidAddressRange(src_addr, size)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (!IsInsideAddressSpace(vm_manager, src_addr, size)) {
+        return ERR_INVALID_ADDRESS_STATE;
+    }
+
+    if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) {
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    const VAddr dst_end_address = dst_addr + size;
+    if (dst_end_address > vm_manager.GetHeapRegionBaseAddress() &&
+        dst_addr < vm_manager.GetHeapRegionEndAddress()) {
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    if (dst_end_address > vm_manager.GetNewMapRegionBaseAddress() &&
+        dst_addr < vm_manager.GetMapRegionEndAddress()) {
+        return ERR_INVALID_MEMORY_RANGE;
+    }
+
+    return RESULT_SUCCESS;
+}
 } // Anonymous namespace

 /// Set the process heap to a given Size. It can both extend and shrink the heap.
@@ -69,15 +136,15 @@ static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
    LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
              src_addr, size);

-    if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) {
-        return ERR_INVALID_ADDRESS;
+    auto* const current_process = Core::CurrentProcess();
+    const auto& vm_manager = current_process->VMManager();
+
+    const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size);
+    if (result != RESULT_SUCCESS) {
+        return result;
    }

-    if (size == 0 || !Is4KBAligned(size)) {
-        return ERR_INVALID_SIZE;
-    }
-
-    return Core::CurrentProcess()->MirrorMemory(dst_addr, src_addr, size);
+    return current_process->MirrorMemory(dst_addr, src_addr, size);
 }

 /// Unmaps a region that was previously mapped with svcMapMemory
@@ -85,15 +152,15 @@ static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
    LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
              src_addr, size);

-    if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) {
-        return ERR_INVALID_ADDRESS;
+    auto* const current_process = Core::CurrentProcess();
+    const auto& vm_manager = current_process->VMManager();
+
+    const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size);
+    if (result != RESULT_SUCCESS) {
+        return result;
    }

-    if (size == 0 || !Is4KBAligned(size)) {
-        return ERR_INVALID_SIZE;
-    }
-
-    return Core::CurrentProcess()->UnmapMemory(dst_addr, src_addr, size);
+    return current_process->UnmapMemory(dst_addr, src_addr, size);
 }

 /// Connect to an OS service given the port name, returns the handle to the port to out
@@ -303,15 +370,15 @@ static ResultCode ArbitrateUnlock(VAddr mutex_addr) {

 struct BreakReason {
    union {
-        u64 raw;
-        BitField<31, 1, u64> dont_kill_application;
+        u32 raw;
+        BitField<31, 1, u32> signal_debugger;
    };
 };

 /// Break program execution
-static void Break(u64 reason, u64 info1, u64 info2) {
+static void Break(u32 reason, u64 info1, u64 info2) {
    BreakReason break_reason{reason};
-    if (break_reason.dont_kill_application) {
+    if (break_reason.signal_debugger) {
        LOG_ERROR(
            Debug_Emulated,
            "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}",
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -35,18 +35,18 @@ void SvcWrap() {

 template <ResultCode func(u32)>
 void SvcWrap() {
-    FuncReturn(func((u32)Param(0)).raw);
+    FuncReturn(func(static_cast<u32>(Param(0))).raw);
 }

 template <ResultCode func(u32, u32)>
 void SvcWrap() {
-    FuncReturn(func((u32)Param(0), (u32)Param(1)).raw);
+    FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1))).raw);
 }

 template <ResultCode func(u32*, u32)>
 void SvcWrap() {
    u32 param_1 = 0;
-    u32 retval = func(&param_1, (u32)Param(1)).raw;
+    u32 retval = func(&param_1, static_cast<u32>(Param(1))).raw;
    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }
@@ -61,7 +61,7 @@ void SvcWrap() {

 template <ResultCode func(u64, s32)>
 void SvcWrap() {
-    FuncReturn(func(Param(0), (s32)Param(1)).raw);
+    FuncReturn(func(Param(0), static_cast<s32>(Param(1))).raw);
 }

 template <ResultCode func(u64, u32)>
@@ -79,19 +79,19 @@ void SvcWrap() {

 template <ResultCode func(u32, u64)>
 void SvcWrap() {
-    FuncReturn(func((u32)(Param(0) & 0xFFFFFFFF), Param(1)).raw);
+    FuncReturn(func(static_cast<u32>(Param(0)), Param(1)).raw);
 }

 template <ResultCode func(u32, u32, u64)>
 void SvcWrap() {
-    FuncReturn(func((u32)(Param(0) & 0xFFFFFFFF), (u32)(Param(1) & 0xFFFFFFFF), Param(2)).raw);
+    FuncReturn(func(static_cast<u32>(Param(0)), static_cast<u32>(Param(1)), Param(2)).raw);
 }

 template <ResultCode func(u32, u32*, u64*)>
 void SvcWrap() {
    u32 param_1 = 0;
    u64 param_2 = 0;
-    ResultCode retval = func((u32)(Param(2) & 0xFFFFFFFF), &param_1, &param_2);
+    ResultCode retval = func(static_cast<u32>(Param(2)), &param_1, &param_2);
    Core::CurrentArmInterface().SetReg(1, param_1);
    Core::CurrentArmInterface().SetReg(2, param_2);
    FuncReturn(retval.raw);
@@ -100,12 +100,12 @@ void SvcWrap() {
 template <ResultCode func(u64, u64, u32, u32)>
 void SvcWrap() {
    FuncReturn(
-        func(Param(0), Param(1), (u32)(Param(3) & 0xFFFFFFFF), (u32)(Param(3) & 0xFFFFFFFF)).raw);
+        func(Param(0), Param(1), static_cast<u32>(Param(3)), static_cast<u32>(Param(3))).raw);
 }

 template <ResultCode func(u32, u64, u32)>
 void SvcWrap() {
-    FuncReturn(func((u32)Param(0), Param(1), (u32)Param(2)).raw);
+    FuncReturn(func(static_cast<u32>(Param(0)), Param(1), static_cast<u32>(Param(2))).raw);
 }

 template <ResultCode func(u64, u64, u64)>
@@ -115,25 +115,28 @@ void SvcWrap() {

 template <ResultCode func(u32, u64, u64, u32)>
 void SvcWrap() {
-    FuncReturn(func((u32)Param(0), Param(1), Param(2), (u32)Param(3)).raw);
+    FuncReturn(
+        func(static_cast<u32>(Param(0)), Param(1), Param(2), static_cast<u32>(Param(3))).raw);
 }

 template <ResultCode func(u32, u64, u64)>
 void SvcWrap() {
-    FuncReturn(func((u32)Param(0), Param(1), Param(2)).raw);
+    FuncReturn(func(static_cast<u32>(Param(0)), Param(1), Param(2)).raw);
 }

 template <ResultCode func(u32*, u64, u64, s64)>
 void SvcWrap() {
    u32 param_1 = 0;
-    ResultCode retval = func(&param_1, Param(1), (u32)(Param(2) & 0xFFFFFFFF), (s64)Param(3));
+    ResultCode retval =
+        func(&param_1, Param(1), static_cast<u32>(Param(2)), static_cast<s64>(Param(3)));
    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval.raw);
 }

 template <ResultCode func(u64, u64, u32, s64)>
 void SvcWrap() {
-    FuncReturn(func(Param(0), Param(1), (u32)Param(2), (s64)Param(3)).raw);
+    FuncReturn(
+        func(Param(0), Param(1), static_cast<u32>(Param(2)), static_cast<s64>(Param(3))).raw);
 }

 template <ResultCode func(u64*, u64, u64, u64)>
@@ -147,9 +150,9 @@ void SvcWrap() {
 template <ResultCode func(u32*, u64, u64, u64, u32, s32)>
 void SvcWrap() {
    u32 param_1 = 0;
-    u32 retval =
-        func(&param_1, Param(1), Param(2), Param(3), (u32)Param(4), (s32)(Param(5) & 0xFFFFFFFF))
-            .raw;
+    u32 retval = func(&param_1, Param(1), Param(2), Param(3), static_cast<u32>(Param(4)),
+                      static_cast<s32>(Param(5)))
+                     .raw;
    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }
@@ -172,7 +175,7 @@ void SvcWrap() {
 template <ResultCode func(u32*, u64, u64, u32)>
 void SvcWrap() {
    u32 param_1 = 0;
-    u32 retval = func(&param_1, Param(1), Param(2), (u32)(Param(3) & 0xFFFFFFFF)).raw;
+    u32 retval = func(&param_1, Param(1), Param(2), static_cast<u32>(Param(3))).raw;
    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }
@@ -181,22 +184,22 @@ template <ResultCode func(Handle*, u64, u32, u32)>
 void SvcWrap() {
    u32 param_1 = 0;
    u32 retval =
-        func(&param_1, Param(1), (u32)(Param(2) & 0xFFFFFFFF), (u32)(Param(3) & 0xFFFFFFFF)).raw;
+        func(&param_1, Param(1), static_cast<u32>(Param(2)), static_cast<u32>(Param(3))).raw;
    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

 template <ResultCode func(u64, u32, s32, s64)>
 void SvcWrap() {
-    FuncReturn(
-        func(Param(0), (u32)(Param(1) & 0xFFFFFFFF), (s32)(Param(2) & 0xFFFFFFFF), (s64)Param(3))
-            .raw);
+    FuncReturn(func(Param(0), static_cast<u32>(Param(1)), static_cast<s32>(Param(2)),
+                    static_cast<s64>(Param(3)))
+                   .raw);
 }

 template <ResultCode func(u64, u32, s32, s32)>
 void SvcWrap() {
-    FuncReturn(func(Param(0), (u32)(Param(1) & 0xFFFFFFFF), (s32)(Param(2) & 0xFFFFFFFF),
-                    (s32)(Param(3) & 0xFFFFFFFF))
+    FuncReturn(func(Param(0), static_cast<u32>(Param(1)), static_cast<s32>(Param(2)),
+                    static_cast<s32>(Param(3)))
                   .raw);
 }

@@ -226,7 +229,7 @@ void SvcWrap() {

 template <void func(s64)>
 void SvcWrap() {
-    func((s64)Param(0));
+    func(static_cast<s64>(Param(0)));
 }

 template <void func(u64, u64 len)>
@@ -239,4 +242,9 @@ void SvcWrap() {
    func(Param(0), Param(1), Param(2));
 }

+template <void func(u32, u64, u64)>
+void SvcWrap() {
+    func(static_cast<u32>(Param(0)), Param(1), Param(2));
+}
+
 } // namespace Kernel
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -183,13 +183,10 @@ void Thread::ResumeFromWait() {
 */
 static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAddr stack_top,
                               VAddr entry_point, u64 arg) {
-    memset(&context, 0, sizeof(Core::ARM_Interface::ThreadContext));
-
+    context = {};
    context.cpu_registers[0] = arg;
    context.pc = entry_point;
    context.sp = stack_top;
-    context.pstate = 0;
-    context.fpcr = 0;
 }

 ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name, VAddr entry_point,
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -2,8 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <chrono>
 #include <cstring>
 #include <memory>
+#include <optional>
 #include <vector>

 #include <opus.h>
@@ -33,7 +35,8 @@ public:
            {1, nullptr, "SetContext"},
            {2, nullptr, "DecodeInterleavedForMultiStream"},
            {3, nullptr, "SetContextForMultiStream"},
-            {4, nullptr, "Unknown4"},
+            {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerformance,
+             "DecodeInterleavedWithPerformance"},
            {5, nullptr, "Unknown5"},
            {6, nullptr, "Unknown6"},
            {7, nullptr, "Unknown7"},
@@ -59,8 +62,31 @@ private:
        ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
    }

-    bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input,
-                                   std::vector<opus_int16>& output) {
+    void DecodeInterleavedWithPerformance(Kernel::HLERequestContext& ctx) {
+        u32 consumed = 0;
+        u32 sample_count = 0;
+        u64 performance = 0;
+        std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16));
+        if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples,
+                                       performance)) {
+            IPC::ResponseBuilder rb{ctx, 2};
+            // TODO(ogniK): Use correct error code
+            rb.Push(ResultCode(-1));
+            return;
+        }
+        IPC::ResponseBuilder rb{ctx, 6};
+        rb.Push(RESULT_SUCCESS);
+        rb.Push<u32>(consumed);
+        rb.Push<u64>(performance);
+        rb.Push<u32>(sample_count);
+        ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
+    }
+
+    bool Decoder_DecodeInterleaved(
+        u32& consumed, u32& sample_count, const std::vector<u8>& input,
+        std::vector<opus_int16>& output,
+        std::optional<std::reference_wrapper<u64>> performance_time = std::nullopt) {
+        const auto start_time = std::chrono::high_resolution_clock::now();
        std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
        if (sizeof(OpusHeader) > input.size())
            return false;
@@ -80,8 +106,13 @@ private:
                        (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)), 0);
        if (out_sample_count < 0)
            return false;
+        const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
        sample_count = out_sample_count;
        consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz);
+        if (performance_time.has_value()) {
+            performance_time->get() =
+                std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
+        }
        return true;
    }

--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -15,6 +15,11 @@
 #include "video_core/renderer_base.h"

 namespace Service::Nvidia::Devices {
+namespace NvErrCodes {
+enum {
+    InvalidNmapHandle = -22,
+};
+}

 nvhost_as_gpu::nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {}
 nvhost_as_gpu::~nvhost_as_gpu() = default;
@@ -79,14 +84,16 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
    std::memcpy(entries.data(), input.data(), input.size());

    auto& gpu = Core::System::GetInstance().GPU();
-
    for (const auto& entry : entries) {
        LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
                    entry.offset, entry.nvmap_handle, entry.pages);
        Tegra::GPUVAddr offset = static_cast<Tegra::GPUVAddr>(entry.offset) << 0x10;
-
        auto object = nvmap_dev->GetObject(entry.nvmap_handle);
-        ASSERT(object);
+        if (!object) {
+            LOG_CRITICAL(Service_NVDRV, "nvmap {} is an invalid handle!", entry.nvmap_handle);
+            std::memcpy(output.data(), entries.data(), output.size());
+            return static_cast<u32>(NvErrCodes::InvalidNmapHandle);
+        }

        ASSERT(object->status == nvmap::Object::Status::Allocated);

@@ -167,10 +174,11 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
    auto& system_instance = Core::System::GetInstance();

    // Remove this memory region from the rasterizer cache.
-    system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(params.offset,
-                                                                     itr->second.size);
-
    auto& gpu = system_instance.GPU();
+    auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
+    ASSERT(cpu_addr);
+    system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
+
    params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);

    buffer_mappings.erase(itr->second.offset);
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -11,6 +11,13 @@

 namespace Service::Nvidia::Devices {

+namespace NvErrCodes {
+enum {
+    OperationNotPermitted = -1,
+    InvalidValue = -22,
+};
+}
+
 nvmap::nvmap() = default;
 nvmap::~nvmap() = default;

@@ -44,7 +51,11 @@ u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& o
 u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) {
    IocCreateParams params;
    std::memcpy(&params, input.data(), sizeof(params));
+    LOG_DEBUG(Service_NVDRV, "size=0x{:08X}", params.size);

+    if (!params.size) {
+        return static_cast<u32>(NvErrCodes::InvalidValue);
+    }
    // Create a new nvmap object and obtain a handle to it.
    auto object = std::make_shared<Object>();
    object->id = next_id++;
@@ -55,8 +66,6 @@ u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) {
    u32 handle = next_handle++;
    handles[handle] = std::move(object);

-    LOG_DEBUG(Service_NVDRV, "size=0x{:08X}", params.size);
-
    params.handle = handle;

    std::memcpy(output.data(), &params, sizeof(params));
@@ -66,9 +75,29 @@ u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) {
 u32 nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) {
    IocAllocParams params;
    std::memcpy(&params, input.data(), sizeof(params));
+    LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.addr);
+
+    if (!params.handle) {
+        return static_cast<u32>(NvErrCodes::InvalidValue);
+    }
+
+    if ((params.align - 1) & params.align) {
+        return static_cast<u32>(NvErrCodes::InvalidValue);
+    }
+
+    const u32 min_alignment = 0x1000;
+    if (params.align < min_alignment) {
+        params.align = min_alignment;
+    }

    auto object = GetObject(params.handle);
-    ASSERT(object);
+    if (!object) {
+        return static_cast<u32>(NvErrCodes::InvalidValue);
+    }
+
+    if (object->status == Object::Status::Allocated) {
+        return static_cast<u32>(NvErrCodes::OperationNotPermitted);
+    }

    object->flags = params.flags;
    object->align = params.align;
@@ -76,8 +105,6 @@ u32 nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) {
    object->addr = params.addr;
    object->status = Object::Status::Allocated;

-    LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.addr);
-
    std::memcpy(output.data(), &params, sizeof(params));
    return 0;
 }
@@ -88,8 +115,14 @@ u32 nvmap::IocGetId(const std::vector<u8>& input, std::vector<u8>& output) {

    LOG_WARNING(Service_NVDRV, "called");

+    if (!params.handle) {
+        return static_cast<u32>(NvErrCodes::InvalidValue);
+    }
+
    auto object = GetObject(params.handle);
-    ASSERT(object);
+    if (!object) {
+        return static_cast<u32>(NvErrCodes::OperationNotPermitted);
+    }

    params.id = object->id;

@@ -105,7 +138,14 @@ u32 nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output) {

    auto itr = std::find_if(handles.begin(), handles.end(),
                            [&](const auto& entry) { return entry.second->id == params.id; });
-    ASSERT(itr != handles.end());
+    if (itr == handles.end()) {
+        return static_cast<u32>(NvErrCodes::InvalidValue);
+    }
+
+    auto& object = itr->second;
+    if (object->status != Object::Status::Allocated) {
+        return static_cast<u32>(NvErrCodes::InvalidValue);
+    }

    itr->second->refcount++;

@@ -125,8 +165,13 @@ u32 nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output) {
    LOG_WARNING(Service_NVDRV, "(STUBBED) called type={}", params.param);

    auto object = GetObject(params.handle);
-    ASSERT(object);
-    ASSERT(object->status == Object::Status::Allocated);
+    if (!object) {
+        return static_cast<u32>(NvErrCodes::InvalidValue);
+    }
+
+    if (object->status != Object::Status::Allocated) {
+        return static_cast<u32>(NvErrCodes::OperationNotPermitted);
+    }

    switch (static_cast<ParamTypes>(params.param)) {
    case ParamTypes::Size:
@@ -163,9 +208,12 @@ u32 nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
    LOG_WARNING(Service_NVDRV, "(STUBBED) called");

    auto itr = handles.find(params.handle);
-    ASSERT(itr != handles.end());
-
-    ASSERT(itr->second->refcount > 0);
+    if (itr == handles.end()) {
+        return static_cast<u32>(NvErrCodes::InvalidValue);
+    }
+    if (!itr->second->refcount) {
+        return static_cast<u32>(NvErrCodes::InvalidValue);
+    }

    itr->second->refcount--;

--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -36,9 +36,9 @@ public:
            RenderTargetFormat format;
            BitField<0, 1, u32> linear;
            union {
-                BitField<0, 4, u32> block_depth;
+                BitField<0, 4, u32> block_width;
                BitField<4, 4, u32> block_height;
-                BitField<8, 4, u32> block_width;
+                BitField<8, 4, u32> block_depth;
            };
            u32 depth;
            u32 layer;
@@ -53,10 +53,20 @@ public:
                                             address_low);
            }

+            u32 BlockWidth() const {
+                // The block width is stored in log2 format.
+                return 1 << block_width;
+            }
+
            u32 BlockHeight() const {
                // The block height is stored in log2 format.
                return 1 << block_height;
            }
+
+            u32 BlockDepth() const {
+                // The block depth is stored in log2 format.
+                return 1 << block_depth;
+            }
        };
        static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");

--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -347,6 +347,16 @@ public:
            DecrWrap = 8,
        };

+        enum class MemoryLayout : u32 {
+            Linear = 0,
+            BlockLinear = 1,
+        };
+
+        enum class InvMemoryLayout : u32 {
+            BlockLinear = 0,
+            Linear = 1,
+        };
+
        struct Cull {
            enum class FrontFace : u32 {
                ClockWise = 0x0900,
@@ -432,7 +442,12 @@ public:
            u32 width;
            u32 height;
            Tegra::RenderTargetFormat format;
-            u32 block_dimensions;
+            union {
+                BitField<0, 3, u32> block_width;
+                BitField<4, 3, u32> block_height;
+                BitField<8, 3, u32> block_depth;
+                BitField<12, 1, InvMemoryLayout> type;
+            } memory_layout;
            u32 array_mode;
            u32 layer_stride;
            u32 base_layer;
@@ -532,7 +547,21 @@ public:
                INSERT_PADDING_WORDS(0x3);
                s32 clear_stencil;

-                INSERT_PADDING_WORDS(0x6C);
+                INSERT_PADDING_WORDS(0x17);
+
+                struct {
+                    u32 enable;
+                    union {
+                        BitField<0, 16, u32> min_x;
+                        BitField<16, 16, u32> max_x;
+                    };
+                    union {
+                        BitField<0, 16, u32> min_y;
+                        BitField<16, 16, u32> max_y;
+                    };
+                } scissor_test;
+
+                INSERT_PADDING_WORDS(0x52);

                s32 stencil_back_func_ref;
                u32 stencil_back_mask;
@@ -548,7 +577,12 @@ public:
                    u32 address_high;
                    u32 address_low;
                    Tegra::DepthFormat format;
-                    u32 block_dimensions;
+                    union {
+                        BitField<0, 4, u32> block_width;
+                        BitField<4, 4, u32> block_height;
+                        BitField<8, 4, u32> block_depth;
+                        BitField<20, 1, InvMemoryLayout> type;
+                    } memory_layout;
                    u32 layer_stride;

                    GPUVAddr Address() const {
@@ -1002,6 +1036,7 @@ ASSERT_REG_POSITION(vertex_buffer, 0x35D);
 ASSERT_REG_POSITION(clear_color[0], 0x360);
 ASSERT_REG_POSITION(clear_depth, 0x364);
 ASSERT_REG_POSITION(clear_stencil, 0x368);
+ASSERT_REG_POSITION(scissor_test, 0x380);
 ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
 ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
 ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -214,6 +214,18 @@ enum class IMinMaxExchange : u64 {
    XHi = 3,
 };

+enum class VmadType : u64 {
+    Size16_Low = 0,
+    Size16_High = 1,
+    Size32 = 2,
+    Invalid = 3,
+};
+
+enum class VmadShr : u64 {
+    Shr7 = 1,
+    Shr15 = 2,
+};
+
 enum class XmadMode : u64 {
    None = 0,
    CLo = 1,
@@ -314,6 +326,15 @@ enum class TextureMiscMode : u64 {
    PTP,
 };

+enum class IsberdMode : u64 {
+    None = 0,
+    Patch = 1,
+    Prim = 2,
+    Attr = 3,
+};
+
+enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };
+
 enum class IpaInterpMode : u64 {
    Linear = 0,
    Perspective = 1,
@@ -340,6 +361,87 @@ struct IpaMode {
    }
 };

+enum class SystemVariable : u64 {
+    LaneId = 0x00,
+    VirtCfg = 0x02,
+    VirtId = 0x03,
+    Pm0 = 0x04,
+    Pm1 = 0x05,
+    Pm2 = 0x06,
+    Pm3 = 0x07,
+    Pm4 = 0x08,
+    Pm5 = 0x09,
+    Pm6 = 0x0a,
+    Pm7 = 0x0b,
+    OrderingTicket = 0x0f,
+    PrimType = 0x10,
+    InvocationId = 0x11,
+    Ydirection = 0x12,
+    ThreadKill = 0x13,
+    ShaderType = 0x14,
+    DirectBeWriteAddressLow = 0x15,
+    DirectBeWriteAddressHigh = 0x16,
+    DirectBeWriteEnabled = 0x17,
+    MachineId0 = 0x18,
+    MachineId1 = 0x19,
+    MachineId2 = 0x1a,
+    MachineId3 = 0x1b,
+    Affinity = 0x1c,
+    InvocationInfo = 0x1d,
+    WscaleFactorXY = 0x1e,
+    WscaleFactorZ = 0x1f,
+    Tid = 0x20,
+    TidX = 0x21,
+    TidY = 0x22,
+    TidZ = 0x23,
+    CtaParam = 0x24,
+    CtaIdX = 0x25,
+    CtaIdY = 0x26,
+    CtaIdZ = 0x27,
+    NtId = 0x28,
+    CirQueueIncrMinusOne = 0x29,
+    Nlatc = 0x2a,
+    SmSpaVersion = 0x2c,
+    MultiPassShaderInfo = 0x2d,
+    LwinHi = 0x2e,
+    SwinHi = 0x2f,
+    SwinLo = 0x30,
+    SwinSz = 0x31,
+    SmemSz = 0x32,
+    SmemBanks = 0x33,
+    LwinLo = 0x34,
+    LwinSz = 0x35,
+    LmemLosz = 0x36,
+    LmemHioff = 0x37,
+    EqMask = 0x38,
+    LtMask = 0x39,
+    LeMask = 0x3a,
+    GtMask = 0x3b,
+    GeMask = 0x3c,
+    RegAlloc = 0x3d,
+    CtxAddr = 0x3e,      // .fmask = F_SM50
+    BarrierAlloc = 0x3e, // .fmask = F_SM60
+    GlobalErrorStatus = 0x40,
+    WarpErrorStatus = 0x42,
+    WarpErrorStatusClear = 0x43,
+    PmHi0 = 0x48,
+    PmHi1 = 0x49,
+    PmHi2 = 0x4a,
+    PmHi3 = 0x4b,
+    PmHi4 = 0x4c,
+    PmHi5 = 0x4d,
+    PmHi6 = 0x4e,
+    PmHi7 = 0x4f,
+    ClockLo = 0x50,
+    ClockHi = 0x51,
+    GlobalTimerLo = 0x52,
+    GlobalTimerHi = 0x53,
+    HwTaskId = 0x60,
+    CircularQueueEntryIndex = 0x61,
+    CircularQueueEntryAddressLow = 0x62,
+    CircularQueueEntryAddressHigh = 0x63,
+};
+
 union Instruction {
    Instruction& operator=(const Instruction& instr) {
        value = instr.value;
@@ -362,6 +464,7 @@ union Instruction {
    BitField<48, 16, u64> opcode;

    union {
+        BitField<20, 16, u64> imm20_16;
        BitField<20, 19, u64> imm20_19;
        BitField<20, 32, s64> imm20_32;
        BitField<45, 1, u64> negate_b;
@@ -403,6 +506,10 @@ union Instruction {
            }
        } lop3;

+        u16 GetImm20_16() const {
+            return static_cast<u16>(imm20_16);
+        }
+
        u32 GetImm20_19() const {
            u32 imm{static_cast<u32>(imm20_19)};
            imm <<= 12;
@@ -914,6 +1021,35 @@ union Instruction {
        }
    } bra;

+    union {
+        BitField<39, 1, u64> emit; // EmitVertex
+        BitField<40, 1, u64> cut;  // EndPrimitive
+    } out;
+
+    union {
+        BitField<31, 1, u64> skew;
+        BitField<32, 1, u64> o;
+        BitField<33, 2, IsberdMode> mode;
+        BitField<47, 2, IsberdShift> shift;
+    } isberd;
+
+    union {
+        BitField<48, 1, u64> signed_a;
+        BitField<38, 1, u64> is_byte_chunk_a;
+        BitField<36, 2, VmadType> type_a;
+        BitField<36, 2, u64> byte_height_a;
+
+        BitField<49, 1, u64> signed_b;
+        BitField<50, 1, u64> use_register_b;
+        BitField<30, 1, u64> is_byte_chunk_b;
+        BitField<28, 2, VmadType> type_b;
+        BitField<28, 2, u64> byte_height_b;
+
+        BitField<51, 2, VmadShr> shr;
+        BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
+        BitField<47, 1, u64> cc;
+    } vmad;
+
    union {
        BitField<20, 16, u64> imm20_16;
        BitField<36, 1, u64> product_shift_left;
@@ -936,6 +1072,10 @@ union Instruction {
        BitField<36, 5, u64> index;
    } cbuf36;

+    // Unsure about the size of this one.
+    // It's always used with a gpr0, so any size should be fine.
+    BitField<20, 8, SystemVariable> sys20;
+
    BitField<47, 1, u64> generates_cc;
    BitField<61, 1, u64> is_b_imm;
    BitField<60, 1, u64> is_b_gpr;
@@ -975,6 +1115,9 @@ public:
        TMML,   // Texture Mip Map Level
        EXIT,
        IPA,
+        OUT_R, // Emit vertex/primitive
+        ISBERD,
+        VMAD,
        FFMA_IMM, // Fused Multiply and Add
        FFMA_CR,
        FFMA_RC,
@@ -1034,6 +1177,7 @@ public:
        MOV_C,
        MOV_R,
        MOV_IMM,
+        MOV_SYS,
        MOV32_IMM,
        SHL_C,
        SHL_R,
@@ -1209,6 +1353,9 @@ private:
            INST("1101111101011---", Id::TMML, Type::Memory, "TMML"),
            INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
            INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
+            INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
+            INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
+            INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"),
            INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
            INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
            INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
@@ -1255,6 +1402,7 @@ private:
            INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
            INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
            INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
+            INST("1111000011001---", Id::MOV_SYS, Type::Trivial, "MOV_SYS"),
            INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
            INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
            INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -255,7 +255,7 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
    return params;
 }

-void RasterizerOpenGL::SetupShaders() {
+void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
    MICROPROFILE_SCOPE(OpenGL_Shader);
    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();

@@ -270,6 +270,11 @@ void RasterizerOpenGL::SetupShaders() {

        // Skip stages that are not enabled
        if (!gpu.regs.IsShaderConfigEnabled(index)) {
+            switch (program) {
+            case Maxwell::ShaderProgram::Geometry:
+                shader_program_manager->UseTrivialGeometryShader();
+                break;
+            }
            continue;
        }

@@ -288,11 +293,18 @@ void RasterizerOpenGL::SetupShaders() {
        switch (program) {
        case Maxwell::ShaderProgram::VertexA:
        case Maxwell::ShaderProgram::VertexB: {
-            shader_program_manager->UseProgrammableVertexShader(shader->GetProgramHandle());
+            shader_program_manager->UseProgrammableVertexShader(
+                shader->GetProgramHandle(primitive_mode));
+            break;
+        }
+        case Maxwell::ShaderProgram::Geometry: {
+            shader_program_manager->UseProgrammableGeometryShader(
+                shader->GetProgramHandle(primitive_mode));
            break;
        }
        case Maxwell::ShaderProgram::Fragment: {
-            shader_program_manager->UseProgrammableFragmentShader(shader->GetProgramHandle());
+            shader_program_manager->UseProgrammableFragmentShader(
+                shader->GetProgramHandle(primitive_mode));
            break;
        }
        default:
@@ -302,12 +314,13 @@ void RasterizerOpenGL::SetupShaders() {
        }

        // Configure the const buffers for this shader stage.
-        current_constbuffer_bindpoint = SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage),
-                                                          shader, current_constbuffer_bindpoint);
+        current_constbuffer_bindpoint =
+            SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
+                              current_constbuffer_bindpoint);

        // Configure the textures for this shader stage.
        current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
-                                                  current_texture_bindpoint);
+                                                  primitive_mode, current_texture_bindpoint);

        // When VertexA is enabled, we have dual vertex shaders
        if (program == Maxwell::ShaderProgram::VertexA) {
@@ -317,8 +330,6 @@ void RasterizerOpenGL::SetupShaders() {
    }

    state.Apply();
-
-    shader_program_manager->UseTrivialGeometryShader();
 }

 std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -541,6 +552,7 @@ void RasterizerOpenGL::DrawArrays() {
    SyncLogicOpState();
    SyncCullMode();
    SyncAlphaTest();
+    SyncScissorTest();
    SyncTransformFeedback();
    SyncPointState();

@@ -580,7 +592,7 @@ void RasterizerOpenGL::DrawArrays() {

    SetupVertexArrays();
    DrawParameters params = SetupDraw();
-    SetupShaders();
+    SetupShaders(params.primitive_mode);

    buffer_cache.Unmap();

@@ -719,7 +731,7 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
 }

 u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader,
-                                        u32 current_bindpoint) {
+                                        GLenum primitive_mode, u32 current_bindpoint) {
    MICROPROFILE_SCOPE(OpenGL_UBO);
    const auto& gpu = Core::System::GetInstance().GPU();
    const auto& maxwell3d = gpu.Maxwell3D();
@@ -771,7 +783,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
            buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));

        // Now configure the bindpoint of the buffer inside the shader
-        glUniformBlockBinding(shader->GetProgramHandle(),
+        glUniformBlockBinding(shader->GetProgramHandle(primitive_mode),
                              shader->GetProgramResourceIndex(used_buffer),
                              current_bindpoint + bindpoint);

@@ -787,7 +799,8 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
    return current_bindpoint + static_cast<u32>(entries.size());
 }

-u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, u32 current_unit) {
+u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
+                                    GLenum primitive_mode, u32 current_unit) {
    MICROPROFILE_SCOPE(OpenGL_Texture);
    const auto& gpu = Core::System::GetInstance().GPU();
    const auto& maxwell3d = gpu.Maxwell3D();
@@ -802,8 +815,8 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,

        // Bind the uniform to the sampler.

-        glProgramUniform1i(shader->GetProgramHandle(), shader->GetUniformLocation(entry),
-                           current_bindpoint);
+        glProgramUniform1i(shader->GetProgramHandle(primitive_mode),
+                           shader->GetUniformLocation(entry), current_bindpoint);

        const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());

@@ -972,6 +985,22 @@ void RasterizerOpenGL::SyncAlphaTest() {
    }
 }

+void RasterizerOpenGL::SyncScissorTest() {
+    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+
+    state.scissor.enabled = (regs.scissor_test.enable != 0);
+    // TODO(Blinkhawk): Figure if the hardware supports scissor testing per viewport and how it's
+    // implemented.
+    if (regs.scissor_test.enable != 0) {
+        const u32 width = regs.scissor_test.max_x - regs.scissor_test.min_x;
+        const u32 height = regs.scissor_test.max_y - regs.scissor_test.min_y;
+        state.scissor.x = regs.scissor_test.min_x;
+        state.scissor.y = regs.scissor_test.min_y;
+        state.scissor.width = width;
+        state.scissor.height = height;
+    }
+}
+
 void RasterizerOpenGL::SyncTransformFeedback() {
    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;

--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -120,7 +120,7 @@ private:
     * @returns The next available bindpoint for use in the next shader stage.
     */
    u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
-                          u32 current_bindpoint);
+                          GLenum primitive_mode, u32 current_bindpoint);

    /*
     * Configures the current textures to use for the draw command.
@@ -130,7 +130,7 @@ private:
     * @returns The next available bindpoint for use in the next shader stage.
     */
    u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
-                      u32 current_unit);
+                      GLenum primitive_mode, u32 current_unit);

    /// Syncs the viewport to match the guest state
    void SyncViewport();
@@ -165,6 +165,9 @@ private:
    /// Syncs the alpha test state to match the guest state
    void SyncAlphaTest();

+    /// Syncs the scissor test state to match the guest state
+    void SyncScissorTest();
+
    /// Syncs the transform feedback state to match the guest state
    void SyncTransformFeedback();

@@ -207,7 +210,7 @@ private:

    DrawParameters SetupDraw();

-    void SetupShaders();
+    void SetupShaders(GLenum primitive_mode);

    enum class AccelDraw { Disabled, Arrays, Indexed };
    AccelDraw accelerate_draw = AccelDraw::Disabled;
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -45,7 +45,9 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
    SurfaceParams params{};
    params.addr = TryGetCpuAddr(config.tic.Address());
    params.is_tiled = config.tic.IsTiled();
+    params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
    params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
+    params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
    params.pixel_format =
        PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value());
    params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
@@ -97,8 +99,11 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
    const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
    SurfaceParams params{};
    params.addr = TryGetCpuAddr(config.Address());
-    params.is_tiled = true;
-    params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
+    params.is_tiled =
+        config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
+    params.block_width = 1 << config.memory_layout.block_width;
+    params.block_height = 1 << config.memory_layout.block_height;
+    params.block_depth = 1 << config.memory_layout.block_depth;
    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
    params.component_type = ComponentTypeFromRenderTarget(config.format);
    params.type = GetFormatType(params.pixel_format);
@@ -120,13 +125,16 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
    return params;
 }

-/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
-                                                             Tegra::GPUVAddr zeta_address,
-                                                             Tegra::DepthFormat format) {
+/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
+    u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+    u32 block_width, u32 block_height, u32 block_depth,
+    Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
    SurfaceParams params{};
    params.addr = TryGetCpuAddr(zeta_address);
-    params.is_tiled = true;
-    params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
+    params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
+    params.block_width = 1 << std::min(block_width, 5U);
+    params.block_height = 1 << std::min(block_height, 5U);
+    params.block_depth = 1 << std::min(block_depth, 5U);
    params.pixel_format = PixelFormatFromDepthFormat(format);
    params.component_type = ComponentTypeFromDepthFormat(format);
    params.type = GetFormatType(params.pixel_format);
@@ -148,7 +156,9 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
    SurfaceParams params{};
    params.addr = TryGetCpuAddr(config.Address());
    params.is_tiled = !config.linear;
-    params.block_height = params.is_tiled ? config.BlockHeight() : 0,
+    params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
+    params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
+    params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
    params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
    params.component_type = ComponentTypeFromRenderTarget(config.format);
    params.type = GetFormatType(params.pixel_format);
@@ -818,6 +828,11 @@ void CachedSurface::LoadGLBuffer() {
    if (params.is_tiled) {
        gl_buffer.resize(total_size);

+        ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
+                   params.block_width, static_cast<u32>(params.target));
+        ASSERT_MSG(params.block_depth == 1, "Block depth is defined as {} on texture type {}",
+                   params.block_depth, static_cast<u32>(params.target));
+
        // TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do
        // this for 3D textures, etc.
        switch (params.target) {
@@ -989,7 +1004,9 @@ Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
    }

    SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer(
-        regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format)};
+        regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format,
+        regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
+        regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};

    return GetSurface(depth_params, preserve_contents);
 }
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -716,9 +716,10 @@ struct SurfaceParams {
    static SurfaceParams CreateForFramebuffer(std::size_t index);

    /// Creates SurfaceParams for a depth buffer configuration
-    static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
-                                              Tegra::GPUVAddr zeta_address,
-                                              Tegra::DepthFormat format);
+    static SurfaceParams CreateForDepthBuffer(
+        u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format,
+        u32 block_width, u32 block_height, u32 block_depth,
+        Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);

    /// Creates SurfaceParams for a Fermi2D surface copy
    static SurfaceParams CreateForFermiCopySurface(
@@ -733,7 +734,9 @@ struct SurfaceParams {

    VAddr addr;
    bool is_tiled;
+    u32 block_width;
    u32 block_height;
+    u32 block_depth;
    PixelFormat pixel_format;
    ComponentType component_type;
    SurfaceType type;
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -68,6 +68,10 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
        program_result = GLShader::GenerateVertexShader(setup);
        gl_type = GL_VERTEX_SHADER;
        break;
+    case Maxwell::ShaderProgram::Geometry:
+        program_result = GLShader::GenerateGeometryShader(setup);
+        gl_type = GL_GEOMETRY_SHADER;
+        break;
    case Maxwell::ShaderProgram::Fragment:
        program_result = GLShader::GenerateFragmentShader(setup);
        gl_type = GL_FRAGMENT_SHADER;
@@ -80,11 +84,16 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)

    entries = program_result.second;

-    OGLShader shader;
-    shader.Create(program_result.first.c_str(), gl_type);
-    program.Create(true, shader.handle);
-    SetShaderUniformBlockBindings(program.handle);
-    VideoCore::LabelGLObject(GL_PROGRAM, program.handle, addr);
+    if (program_type != Maxwell::ShaderProgram::Geometry) {
+        OGLShader shader;
+        shader.Create(program_result.first.c_str(), gl_type);
+        program.Create(true, shader.handle);
+        SetShaderUniformBlockBindings(program.handle);
+        VideoCore::LabelGLObject(GL_PROGRAM, program.handle, addr);
+    } else {
+        // Store shader's code to lazily build it on draw
+        geometry_programs.code = program_result.first;
+    }
 }

 GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
@@ -110,6 +119,21 @@ GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
    return search->second;
 }

+GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program,
+                                         const std::string& glsl_topology,
+                                         const std::string& debug_name) {
+    if (target_program.handle != 0) {
+        return target_program.handle;
+    }
+    const std::string source{geometry_programs.code + "layout (" + glsl_topology + ") in;\n"};
+    OGLShader shader;
+    shader.Create(source.c_str(), GL_GEOMETRY_SHADER);
+    target_program.Create(true, shader.handle);
+    SetShaderUniformBlockBindings(target_program.handle);
+    VideoCore::LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
+    return target_program.handle;
+};
+
 Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
    const VAddr program_addr{GetShaderAddress(program)};

--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -7,6 +7,7 @@
 #include <map>
 #include <memory>

+#include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -38,8 +39,31 @@ public:
    }

    /// Gets the GL program handle for the shader
-    GLuint GetProgramHandle() const {
-        return program.handle;
+    GLuint GetProgramHandle(GLenum primitive_mode) {
+        if (program_type != Maxwell::ShaderProgram::Geometry) {
+            return program.handle;
+        }
+        switch (primitive_mode) {
+        case GL_POINTS:
+            return LazyGeometryProgram(geometry_programs.points, "points", "ShaderPoints");
+        case GL_LINES:
+        case GL_LINE_STRIP:
+            return LazyGeometryProgram(geometry_programs.lines, "lines", "ShaderLines");
+        case GL_LINES_ADJACENCY:
+        case GL_LINE_STRIP_ADJACENCY:
+            return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency",
+                                       "ShaderLinesAdjacency");
+        case GL_TRIANGLES:
+        case GL_TRIANGLE_STRIP:
+        case GL_TRIANGLE_FAN:
+            return LazyGeometryProgram(geometry_programs.triangles, "triangles", "ShaderTriangles");
+        case GL_TRIANGLES_ADJACENCY:
+        case GL_TRIANGLE_STRIP_ADJACENCY:
+            return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency",
+                                       "ShaderLines");
+        default:
+            UNREACHABLE_MSG("Unknown primitive mode.");
+        }
    }

    /// Gets the GL program resource location for the specified resource, caching as needed
@@ -49,12 +73,30 @@ public:
    GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);

 private:
+    /// Generates a geometry shader or returns one that already exists.
+    GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology,
+                               const std::string& debug_name);
+
    VAddr addr;
    Maxwell::ShaderProgram program_type;
    GLShader::ShaderSetup setup;
    GLShader::ShaderEntries entries;
+
+    // Non-geometry program.
    OGLProgram program;

+    // Geometry programs. These are needed because GLSL needs an input topology but it's not
+    // declared by the hardware. Workaround this issue by generating a different shader per input
+    // topology class.
+    struct {
+        std::string code;
+        OGLProgram points;
+        OGLProgram lines;
+        OGLProgram lines_adjacency;
+        OGLProgram triangles;
+        OGLProgram triangles_adjacency;
+    } geometry_programs;
+
    std::map<u32, GLuint> resource_cache;
    std::map<u32, GLint> uniform_cache;
 };
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -7,6 +7,7 @@
 #include <string>
 #include <string_view>

+#include <boost/optional.hpp>
 #include <fmt/format.h>

 #include "common/assert.h"
@@ -29,11 +30,32 @@ using Tegra::Shader::SubOp;
 constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
 constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);

+enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
+
+constexpr u32 MAX_GEOMETRY_BUFFERS = 6;
+constexpr u32 MAX_ATTRIBUTES = 0x100; // Size in vec4s, this value is untested
+
 class DecompileFail : public std::runtime_error {
 public:
    using std::runtime_error::runtime_error;
 };

+/// Translate topology
+static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
+    switch (topology) {
+    case Tegra::Shader::OutputTopology::PointList:
+        return "points";
+    case Tegra::Shader::OutputTopology::LineStrip:
+        return "line_strip";
+    case Tegra::Shader::OutputTopology::TriangleStrip:
+        return "triangle_strip";
+    default:
+        LOG_CRITICAL(Render_OpenGL, "Unknown output topology {}", static_cast<u32>(topology));
+        UNREACHABLE();
+        return "points";
+    }
+}
+
 /// Describes the behaviour of code path of a given entry point and a return point.
 enum class ExitMethod {
    Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
@@ -253,8 +275,9 @@ enum class InternalFlag : u64 {
 class GLSLRegisterManager {
 public:
    GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations,
-                        const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix)
-        : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix} {
+                        const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix,
+                        const Tegra::Shader::Header& header)
+        : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header} {
        BuildRegisterList();
        BuildInputList();
    }
@@ -358,11 +381,13 @@ public:
     * @param reg The destination register to use.
     * @param elem The element to use for the operation.
     * @param attribute The input attribute to use as the source value.
+     * @param vertex The register that decides which vertex to read from (used in GS).
     */
    void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute,
-                                    const Tegra::Shader::IpaMode& input_mode) {
+                                    const Tegra::Shader::IpaMode& input_mode,
+                                    boost::optional<Register> vertex = {}) {
        const std::string dest = GetRegisterAsFloat(reg);
-        const std::string src = GetInputAttribute(attribute, input_mode) + GetSwizzle(elem);
+        const std::string src = GetInputAttribute(attribute, input_mode, vertex) + GetSwizzle(elem);
        shader.AddLine(dest + " = " + src + ';');
    }

@@ -391,16 +416,29 @@ public:
     * are stored as floats, so this may require conversion.
     * @param attribute The destination output attribute.
     * @param elem The element to use for the operation.
-     * @param reg The register to use as the source value.
+     * @param val_reg The register to use as the source value.
+     * @param buf_reg The register that tells which buffer to write to (used in geometry shaders).
     */
-    void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) {
+    void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& val_reg,
+                                      const Register& buf_reg) {
        const std::string dest = GetOutputAttribute(attribute);
-        const std::string src = GetRegisterAsFloat(reg);
+        const std::string src = GetRegisterAsFloat(val_reg);

        if (!dest.empty()) {
            // Can happen with unknown/unimplemented output attributes, in which case we ignore the
            // instruction for now.
-            shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
+            if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
+                // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry
+                // shader. These instructions use a dirty register as buffer index. To avoid some
+                // drivers from complaining for the out of boundary writes, guard them.
+                const std::string buf_index{"min(" + GetRegisterAsInteger(buf_reg) + ", " +
+                                            std::to_string(MAX_GEOMETRY_BUFFERS - 1) + ')'};
+                shader.AddLine("amem[" + buf_index + "][" +
+                               std::to_string(static_cast<u32>(attribute)) + ']' +
+                               GetSwizzle(elem) + " = " + src + ';');
+            } else {
+                shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
+            }
        }
    }

@@ -441,58 +479,18 @@ public:
        }
    }

-    /// Add declarations for registers
+    /// Add declarations.
    void GenerateDeclarations(const std::string& suffix) {
-        for (const auto& reg : regs) {
-            declarations.AddLine(GLSLRegister::GetTypeString() + ' ' + reg.GetPrefixString() +
-                                 std::to_string(reg.GetIndex()) + '_' + suffix + " = 0;");
-        }
-        declarations.AddNewLine();
-
-        for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) {
-            const InternalFlag code = static_cast<InternalFlag>(ii);
-            declarations.AddLine("bool " + GetInternalFlag(code) + " = false;");
-        }
-        declarations.AddNewLine();
-
-        for (const auto element : declr_input_attribute) {
-            // TODO(bunnei): Use proper number of elements for these
-            u32 idx =
-                static_cast<u32>(element.first) - static_cast<u32>(Attribute::Index::Attribute_0);
-            declarations.AddLine("layout(location = " + std::to_string(idx) + ")" +
-                                 GetInputFlags(element.first) + "in vec4 " +
-                                 GetInputAttribute(element.first, element.second) + ';');
-        }
-        declarations.AddNewLine();
-
-        for (const auto& index : declr_output_attribute) {
-            // TODO(bunnei): Use proper number of elements for these
-            declarations.AddLine("layout(location = " +
-                                 std::to_string(static_cast<u32>(index) -
-                                                static_cast<u32>(Attribute::Index::Attribute_0)) +
-                                 ") out vec4 " + GetOutputAttribute(index) + ';');
-        }
-        declarations.AddNewLine();
-
-        for (const auto& entry : GetConstBuffersDeclarations()) {
-            declarations.AddLine("layout(std140) uniform " + entry.GetName());
-            declarations.AddLine('{');
-            declarations.AddLine("    vec4 c" + std::to_string(entry.GetIndex()) +
-                                 "[MAX_CONSTBUFFER_ELEMENTS];");
-            declarations.AddLine("};");
-            declarations.AddNewLine();
-        }
-        declarations.AddNewLine();
-
-        const auto& samplers = GetSamplers();
-        for (const auto& sampler : samplers) {
-            declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() +
-                                 ';');
-        }
-        declarations.AddNewLine();
+        GenerateRegisters(suffix);
+        GenerateInternalFlags();
+        GenerateInputAttrs();
+        GenerateOutputAttrs();
+        GenerateConstBuffers();
+        GenerateSamplers();
+        GenerateGeometry();
    }

-    /// Returns a list of constant buffer declarations
+    /// Returns a list of constant buffer declarations.
    std::vector<ConstBufferEntry> GetConstBuffersDeclarations() const {
        std::vector<ConstBufferEntry> result;
        std::copy_if(declr_const_buffers.begin(), declr_const_buffers.end(),
@@ -500,7 +498,7 @@ public:
        return result;
    }

-    /// Returns a list of samplers used in the shader
+    /// Returns a list of samplers used in the shader.
    const std::vector<SamplerEntry>& GetSamplers() const {
        return used_samplers;
    }
@@ -509,7 +507,7 @@ public:
    /// necessary.
    std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
                              bool is_array, bool is_shadow) {
-        const std::size_t offset = static_cast<std::size_t>(sampler.index.Value());
+        const auto offset = static_cast<std::size_t>(sampler.index.Value());

        // If this sampler has already been used, return the existing mapping.
        const auto itr =
@@ -530,6 +528,129 @@ public:
    }

 private:
+    /// Generates declarations for registers.
+    void GenerateRegisters(const std::string& suffix) {
+        for (const auto& reg : regs) {
+            declarations.AddLine(GLSLRegister::GetTypeString() + ' ' + reg.GetPrefixString() +
+                                 std::to_string(reg.GetIndex()) + '_' + suffix + " = 0;");
+        }
+        declarations.AddNewLine();
+    }
+
+    /// Generates declarations for internal flags.
+    void GenerateInternalFlags() {
+        for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) {
+            const InternalFlag code = static_cast<InternalFlag>(ii);
+            declarations.AddLine("bool " + GetInternalFlag(code) + " = false;");
+        }
+        declarations.AddNewLine();
+    }
+
+    /// Generates declarations for input attributes.
+    void GenerateInputAttrs() {
+        if (stage != Maxwell3D::Regs::ShaderStage::Vertex) {
+            const std::string attr =
+                stage == Maxwell3D::Regs::ShaderStage::Geometry ? "gs_position[]" : "position";
+            declarations.AddLine("layout (location = " + std::to_string(POSITION_VARYING_LOCATION) +
+                                 ") in vec4 " + attr + ';');
+        }
+
+        for (const auto element : declr_input_attribute) {
+            // TODO(bunnei): Use proper number of elements for these
+            u32 idx =
+                static_cast<u32>(element.first) - static_cast<u32>(Attribute::Index::Attribute_0);
+            if (stage != Maxwell3D::Regs::ShaderStage::Vertex) {
+                // If inputs are varyings, add an offset
+                idx += GENERIC_VARYING_START_LOCATION;
+            }
+
+            std::string attr{GetInputAttribute(element.first, element.second)};
+            if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
+                attr = "gs_" + attr + "[]";
+            }
+            declarations.AddLine("layout (location = " + std::to_string(idx) + ") " +
+                                 GetInputFlags(element.first) + "in vec4 " + attr + ';');
+        }
+
+        declarations.AddNewLine();
+    }
+
+    /// Generates declarations for output attributes.
+    void GenerateOutputAttrs() {
+        if (stage != Maxwell3D::Regs::ShaderStage::Fragment) {
+            declarations.AddLine("layout (location = " + std::to_string(POSITION_VARYING_LOCATION) +
+                                 ") out vec4 position;");
+        }
+        for (const auto& index : declr_output_attribute) {
+            // TODO(bunnei): Use proper number of elements for these
+            const u32 idx = static_cast<u32>(index) -
+                            static_cast<u32>(Attribute::Index::Attribute_0) +
+                            GENERIC_VARYING_START_LOCATION;
+            declarations.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " +
+                                 GetOutputAttribute(index) + ';');
+        }
+        declarations.AddNewLine();
+    }
+
+    /// Generates declarations for constant buffers.
+    void GenerateConstBuffers() {
+        for (const auto& entry : GetConstBuffersDeclarations()) {
+            declarations.AddLine("layout (std140) uniform " + entry.GetName());
+            declarations.AddLine('{');
+            declarations.AddLine("    vec4 c" + std::to_string(entry.GetIndex()) +
+                                 "[MAX_CONSTBUFFER_ELEMENTS];");
+            declarations.AddLine("};");
+            declarations.AddNewLine();
+        }
+        declarations.AddNewLine();
+    }
+
+    /// Generates declarations for samplers.
+    void GenerateSamplers() {
+        const auto& samplers = GetSamplers();
+        for (const auto& sampler : samplers) {
+            declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() +
+                                 ';');
+        }
+        declarations.AddNewLine();
+    }
+
+    /// Generates declarations used for geometry shaders.
+    void GenerateGeometry() {
+        if (stage != Maxwell3D::Regs::ShaderStage::Geometry)
+            return;
+
+        declarations.AddLine(
+            "layout (" + GetTopologyName(header.common3.output_topology) +
+            ", max_vertices = " + std::to_string(header.common4.max_output_vertices) + ") out;");
+        declarations.AddNewLine();
+
+        declarations.AddLine("vec4 amem[" + std::to_string(MAX_GEOMETRY_BUFFERS) + "][" +
+                             std::to_string(MAX_ATTRIBUTES) + "];");
+        declarations.AddNewLine();
+
+        constexpr char buffer[] = "amem[output_buffer]";
+        declarations.AddLine("void emit_vertex(uint output_buffer) {");
+        ++declarations.scope;
+        for (const auto element : declr_output_attribute) {
+            declarations.AddLine(GetOutputAttribute(element) + " = " + buffer + '[' +
+                                 std::to_string(static_cast<u32>(element)) + "];");
+        }
+
+        declarations.AddLine("position = " + std::string(buffer) + '[' +
+                             std::to_string(static_cast<u32>(Attribute::Index::Position)) + "];");
+
+        // If a geometry shader is attached, it will always flip (it's the last stage before
+        // fragment). For more info about flipping, refer to gl_shader_gen.cpp.
+        declarations.AddLine("position.xy *= viewport_flip.xy;");
+        declarations.AddLine("gl_Position = position;");
+        declarations.AddLine("position.w = 1.0;");
+        declarations.AddLine("EmitVertex();");
+        --declarations.scope;
+        declarations.AddLine('}');
+        declarations.AddNewLine();
+    }
+
    /// Generates code representing a temporary (GPR) register.
    std::string GetRegister(const Register& reg, unsigned elem) {
        if (reg == Register::ZeroIndex) {
@@ -586,11 +707,19 @@ private:

    /// Generates code representing an input attribute register.
    std::string GetInputAttribute(Attribute::Index attribute,
-                                  const Tegra::Shader::IpaMode& input_mode) {
+                                  const Tegra::Shader::IpaMode& input_mode,
+                                  boost::optional<Register> vertex = {}) {
+        auto GeometryPass = [&](const std::string& name) {
+            if (stage == Maxwell3D::Regs::ShaderStage::Geometry && vertex) {
+                return "gs_" + name + '[' + GetRegisterAsInteger(vertex.value(), 0, false) + ']';
+            }
+            return name;
+        };
+
        switch (attribute) {
        case Attribute::Index::Position:
            if (stage != Maxwell3D::Regs::ShaderStage::Fragment) {
-                return "position";
+                return GeometryPass("position");
            } else {
                return "vec4(gl_FragCoord.x, gl_FragCoord.y, gl_FragCoord.z, 1.0)";
            }
@@ -619,7 +748,7 @@ private:
                        UNREACHABLE();
                    }
                }
-                return "input_attribute_" + std::to_string(index);
+                return GeometryPass("input_attribute_" + std::to_string(index));
            }

            LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", static_cast<u32>(attribute));
@@ -672,7 +801,7 @@ private:
        return out;
    }

-    /// Generates code representing an output attribute register.
+    /// Generates code representing the declaration name of an output attribute register.
    std::string GetOutputAttribute(Attribute::Index attribute) {
        switch (attribute) {
        case Attribute::Index::Position:
@@ -708,6 +837,7 @@ private:
    std::vector<SamplerEntry> used_samplers;
    const Maxwell3D::Regs::ShaderStage& stage;
    const std::string& suffix;
+    const Tegra::Shader::Header& header;
 };

 class GLSLGenerator {
@@ -1103,8 +1233,8 @@ private:
            return offset + 1;
        }

-        shader.AddLine("// " + std::to_string(offset) + ": " + opcode->GetName() + " (" +
-                       std::to_string(instr.value) + ')');
+        shader.AddLine(
+            fmt::format("// {}: {} (0x{:016x})", offset, opcode->GetName(), instr.value));

        using Tegra::Shader::Pred;
        ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute,
@@ -1826,7 +1956,7 @@ private:
                const auto LoadNextElement = [&](u32 reg_offset) {
                    regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
                                                    static_cast<Attribute::Index>(next_index),
-                                                    input_mode);
+                                                    input_mode, instr.gpr39.Value());

                    // Load the next attribute element into the following register. If the element
                    // to load goes beyond the vec4 size, load the first element of the next
@@ -1890,8 +2020,8 @@ private:

                const auto StoreNextElement = [&](u32 reg_offset) {
                    regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
-                                                      next_element,
-                                                      instr.gpr0.Value() + reg_offset);
+                                                      next_element, instr.gpr0.Value() + reg_offset,
+                                                      instr.gpr39.Value());

                    // Load the next attribute element into the following register. If the element
                    // to load goes beyond the vec4 size, load the first element of the next
@@ -2734,6 +2864,52 @@ private:

                break;
            }
+            case OpCode::Id::OUT_R: {
+                ASSERT(instr.gpr20.Value() == Register::ZeroIndex);
+                ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry,
+                           "OUT is expected to be used in a geometry shader.");
+
+                if (instr.out.emit) {
+                    // gpr0 is used to store the next address. Hardware returns a pointer but
+                    // we just return the next index with a cyclic cap.
+                    const std::string current{regs.GetRegisterAsInteger(instr.gpr8, 0, false)};
+                    const std::string next = "((" + current + " + 1" + ") % " +
+                                             std::to_string(MAX_GEOMETRY_BUFFERS) + ')';
+                    shader.AddLine("emit_vertex(" + current + ");");
+                    regs.SetRegisterToInteger(instr.gpr0, false, 0, next, 1, 1);
+                }
+                if (instr.out.cut) {
+                    shader.AddLine("EndPrimitive();");
+                }
+
+                break;
+            }
+            case OpCode::Id::MOV_SYS: {
+                switch (instr.sys20) {
+                case Tegra::Shader::SystemVariable::InvocationInfo: {
+                    LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
+                    regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1);
+                    break;
+                }
+                default: {
+                    LOG_CRITICAL(HW_GPU, "Unhandled system move: {}",
+                                 static_cast<u32>(instr.sys20.Value()));
+                    UNREACHABLE();
+                }
+                }
+                break;
+            }
+            case OpCode::Id::ISBERD: {
+                ASSERT(instr.isberd.o == 0);
+                ASSERT(instr.isberd.skew == 0);
+                ASSERT(instr.isberd.shift == Tegra::Shader::IsberdShift::None);
+                ASSERT(instr.isberd.mode == Tegra::Shader::IsberdMode::None);
+                ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry,
+                           "ISBERD is expected to be used in a geometry shader.");
+                LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
+                regs.SetRegisterToFloat(instr.gpr0, 0, regs.GetRegisterAsFloat(instr.gpr8), 1, 1);
+                break;
+            }
            case OpCode::Id::BRA: {
                ASSERT_MSG(instr.bra.constant_buffer == 0,
                           "BRA with constant buffers are not implemented");
@@ -2777,6 +2953,88 @@ private:
                LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
                break;
            }
+            case OpCode::Id::VMAD: {
+                const bool signed_a = instr.vmad.signed_a == 1;
+                const bool signed_b = instr.vmad.signed_b == 1;
+                const bool result_signed = signed_a || signed_b;
+                boost::optional<std::string> forced_result;
+
+                auto Unpack = [&](const std::string& op, bool is_chunk, bool is_signed,
+                                  Tegra::Shader::VmadType type, u64 byte_height) {
+                    const std::string value = [&]() {
+                        if (!is_chunk) {
+                            const auto offset = static_cast<u32>(byte_height * 8);
+                            return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)";
+                        }
+                        const std::string zero = "0";
+
+                        switch (type) {
+                        case Tegra::Shader::VmadType::Size16_Low:
+                            return '(' + op + " & 0xffff)";
+                        case Tegra::Shader::VmadType::Size16_High:
+                            return '(' + op + " >> 16)";
+                        case Tegra::Shader::VmadType::Size32:
+                            // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when
+                            // this type is used (1 * 1 + 0 == 0x5b800000). Until a better
+                            // explanation is found: assert.
+                            UNREACHABLE_MSG("Unimplemented");
+                            return zero;
+                        case Tegra::Shader::VmadType::Invalid:
+                            // Note(Rodrigo): This flag is invalid according to nvdisasm. From my
+                            // testing (even though it's invalid) this makes the whole instruction
+                            // assign zero to target register.
+                            forced_result = boost::make_optional(zero);
+                            return zero;
+                        default:
+                            UNREACHABLE();
+                            return zero;
+                        }
+                    }();
+
+                    if (is_signed) {
+                        return "int(" + value + ')';
+                    }
+                    return value;
+                };
+
+                const std::string op_a = Unpack(regs.GetRegisterAsInteger(instr.gpr8, 0, false),
+                                                instr.vmad.is_byte_chunk_a != 0, signed_a,
+                                                instr.vmad.type_a, instr.vmad.byte_height_a);
+
+                std::string op_b;
+                if (instr.vmad.use_register_b) {
+                    op_b = Unpack(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
+                                  instr.vmad.is_byte_chunk_b != 0, signed_b, instr.vmad.type_b,
+                                  instr.vmad.byte_height_b);
+                } else {
+                    op_b = '(' +
+                           std::to_string(signed_b ? static_cast<s16>(instr.alu.GetImm20_16())
+                                                   : instr.alu.GetImm20_16()) +
+                           ')';
+                }
+
+                const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed);
+
+                std::string result;
+                if (forced_result) {
+                    result = *forced_result;
+                } else {
+                    result = '(' + op_a + " * " + op_b + " + " + op_c + ')';
+
+                    switch (instr.vmad.shr) {
+                    case Tegra::Shader::VmadShr::Shr7:
+                        result = '(' + result + " >> 7)";
+                        break;
+                    case Tegra::Shader::VmadShr::Shr15:
+                        result = '(' + result + " >> 15)";
+                        break;
+                    }
+                }
+                regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1,
+                                          instr.vmad.saturate == 1, 0, Register::Size::Word,
+                                          instr.vmad.cc);
+                break;
+            }
            default: {
                LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
                UNREACHABLE();
@@ -2907,7 +3165,7 @@ private:

    ShaderWriter shader;
    ShaderWriter declarations;
-    GLSLRegisterManager regs{shader, declarations, stage, suffix};
+    GLSLRegisterManager regs{shader, declarations, stage, suffix, header};

    // Declarations
    std::set<std::string> declr_predicates;
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -17,7 +17,18 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
    std::string out = "#version 430 core\n";
    out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
    out += Decompiler::GetCommonDeclarations();
-    out += "bool exec_vertex();\n";
+
+    out += R"(
+out gl_PerVertex {
+    vec4 gl_Position;
+};
+
+layout(std140) uniform vs_config {
+    vec4 viewport_flip;
+    uvec4 instance_id;
+    uvec4 flip_stage;
+};
+)";

    if (setup.IsDualProgram()) {
        out += "bool exec_vertex_b();\n";
@@ -28,19 +39,18 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
                                     Maxwell3D::Regs::ShaderStage::Vertex, "vertex")
            .get_value_or({});

+    out += program.first;
+
+    if (setup.IsDualProgram()) {
+        ProgramResult program_b =
+            Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET,
+                                         Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b")
+                .get_value_or({});
+        out += program_b.first;
+    }
+
    out += R"(

-out gl_PerVertex {
-    vec4 gl_Position;
-};
-
-out vec4 position;
-
-layout (std140) uniform vs_config {
-    vec4 viewport_flip;
-    uvec4 instance_id;
-};
-
 void main() {
    position = vec4(0.0, 0.0, 0.0, 0.0);
    exec_vertex();
@@ -52,27 +62,52 @@ void main() {

    out += R"(

-    // Viewport can be flipped, which is unsupported by glViewport
-    position.xy *= viewport_flip.xy;
+    // Check if the flip stage is VertexB
+    if (flip_stage[0] == 1) {
+        // Viewport can be flipped, which is unsupported by glViewport
+        position.xy *= viewport_flip.xy;
+    }
    gl_Position = position;

    // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0
    // For now, this is here to bring order in lieu of proper emulation
-    position.w = 1.0;
+    if (flip_stage[0] == 1) {
+        position.w = 1.0;
+    }
 }

 )";

+    return {out, program.second};
+}
+
+ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
+    std::string out = "#version 430 core\n";
+    out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+    out += Decompiler::GetCommonDeclarations();
+    out += "bool exec_geometry();\n";
+
+    ProgramResult program =
+        Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
+                                     Maxwell3D::Regs::ShaderStage::Geometry, "geometry")
+            .get_value_or({});
+    out += R"(
+out gl_PerVertex {
+    vec4 gl_Position;
+};
+
+layout (std140) uniform gs_config {
+    vec4 viewport_flip;
+    uvec4 instance_id;
+    uvec4 flip_stage;
+};
+
+void main() {
+    exec_geometry();
+}
+
+)";
    out += program.first;
-
-    if (setup.IsDualProgram()) {
-        ProgramResult program_b =
-            Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET,
-                                         Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b")
-                .get_value_or({});
-        out += program_b.first;
-    }
-
    return {out, program.second};
 }

@@ -87,7 +122,6 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
                                     Maxwell3D::Regs::ShaderStage::Fragment, "fragment")
            .get_value_or({});
    out += R"(
-in vec4 position;
 layout(location = 0) out vec4 FragColor0;
 layout(location = 1) out vec4 FragColor1;
 layout(location = 2) out vec4 FragColor2;
@@ -100,6 +134,7 @@ layout(location = 7) out vec4 FragColor7;
 layout (std140) uniform fs_config {
    vec4 viewport_flip;
    uvec4 instance_id;
+    uvec4 flip_stage;
 };

 void main() {
@@ -110,5 +145,4 @@ void main() {
    out += program.first;
    return {out, program.second};
 }
-
-} // namespace OpenGL::GLShader
+} // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -195,6 +195,12 @@ private:
 */
 ProgramResult GenerateVertexShader(const ShaderSetup& setup);

+/**
+ * Generates the GLSL geometry shader program source code for the given GS program
+ * @returns String of the shader source code
+ */
+ProgramResult GenerateGeometryShader(const ShaderSetup& setup);
+
 /**
 * Generates the GLSL fragment shader program source code for the given FS program
 * @returns String of the shader source code
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -18,6 +18,14 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh

    // We only assign the instance to the first component of the vector, the rest is just padding.
    instance_id[0] = state.current_instance;
+
+    // Assign in which stage the position has to be flipped
+    // (the last stage before the fragment shader).
+    if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) {
+        flip_stage[0] = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
+    } else {
+        flip_stage[0] = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
+    }
 }

 } // namespace OpenGL::GLShader
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -21,8 +21,9 @@ struct MaxwellUniformData {
    void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
    alignas(16) GLvec4 viewport_flip;
    alignas(16) GLuvec4 instance_id;
+    alignas(16) GLuvec4 flip_stage;
 };
-static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect");
+static_assert(sizeof(MaxwellUniformData) == 48, "MaxwellUniformData structure size is incorrect");
 static_assert(sizeof(MaxwellUniformData) < 16384,
              "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");

@@ -36,6 +37,10 @@ public:
        vs = program;
    }

+    void UseProgrammableGeometryShader(GLuint program) {
+        gs = program;
+    }
+
    void UseProgrammableFragmentShader(GLuint program) {
        fs = program;
    }
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -161,7 +161,9 @@ struct TICEntry {
        BitField<21, 3, TICHeaderVersion> header_version;
    };
    union {
+        BitField<0, 3, u32> block_width;
        BitField<3, 3, u32> block_height;
+        BitField<6, 3, u32> block_depth;

        // High 16 bits of the pitch value
        BitField<0, 16, u32> pitch_high;
@@ -202,13 +204,24 @@ struct TICEntry {
        return depth_minus_1 + 1;
    }

+    u32 BlockWidth() const {
+        ASSERT(IsTiled());
+        // The block height is stored in log2 format.
+        return 1 << block_width;
+    }
+
    u32 BlockHeight() const {
-        ASSERT(header_version == TICHeaderVersion::BlockLinear ||
-               header_version == TICHeaderVersion::BlockLinearColorKey);
+        ASSERT(IsTiled());
        // The block height is stored in log2 format.
        return 1 << block_height;
    }

+    u32 BlockDepth() const {
+        ASSERT(IsTiled());
+        // The block height is stored in log2 format.
+        return 1 << block_depth;
+    }
+
    bool IsTiled() const {
        return header_version == TICHeaderVersion::BlockLinear ||
               header_version == TICHeaderVersion::BlockLinearColorKey;
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -169,16 +169,20 @@ static void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr,
    const std::string nice_addr = fmt::format("0x{:016x}", addr);
    std::string object_label;

-    switch (identifier) {
-    case GL_TEXTURE:
-        object_label = extra_info + "@" + nice_addr;
-        break;
-    case GL_PROGRAM:
-        object_label = "ShaderProgram@" + nice_addr;
-        break;
-    default:
-        object_label = fmt::format("Object(0x{:x})@{}", identifier, nice_addr);
-        break;
+    if (extra_info.empty()) {
+        switch (identifier) {
+        case GL_TEXTURE:
+            object_label = "Texture@" + nice_addr;
+            break;
+        case GL_PROGRAM:
+            object_label = "Shader@" + nice_addr;
+            break;
+        default:
+            object_label = fmt::format("Object(0x{:x})@{}", identifier, nice_addr);
+            break;
+        }
+    } else {
+        object_label = extra_info + '@' + nice_addr;
    }
    glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
 }
Author	SHA1	Message	Date
Zach Hilman	f61379f8d2	patch_manager: Move non-Program RomFS patch log to Debug Normal Program-type patches will still be logged to aid in debugging, but for others (mainly Control), it was moved to Debug.	2018-10-12 23:27:19 -04:00
Zach Hilman	90c07e0d33	content_archive: Move get key log to Trace level Avoids printing live keys in the general log.	2018-10-12 23:25:59 -04:00
bunnei	2946d4bdbe	Merge pull request #1467 from ogniK5377/svcbreak-type-fix Fixed incorrect types for svcBreak	2018-10-12 12:08:08 -04:00
bunnei	0f7ab3e21a	Merge pull request #1478 from ogniK5377/remap-invalidhandle-remap Passing an invalid nmap handle to Remap should throw an error	2018-10-12 12:07:14 -04:00
bunnei	f9d03b1d41	Merge pull request #1482 from lioncash/init thread: Remove unnecessary memset from ResetThreadContext()	2018-10-12 12:06:51 -04:00
bunnei	dc328440c8	Merge pull request #1479 from ogniK5377/nmap-revamped Added error codes for nvmap	2018-10-12 12:06:22 -04:00
Lioncash	b492d43e63	thread: Remove unnecessary memset from ResetThreadContext() Regular value initialization is adequate here for zeroing out data. It also has the benefit of not invoking undefined behavior if a non-trivial type is ever added to the struct for whatever reason.	2018-10-12 10:57:31 -04:00
David Marcec	4d2de6564f	Returned an error before processing other remaps	2018-10-12 17:10:41 +11:00
David Marcec	c55b5de0fb	Made the minimum alignment more clear	2018-10-12 17:06:46 +11:00
bunnei	9bf409f275	Merge pull request #1474 from ogniK5377/hwopus-decodeinterleavedwithperformance HwOpus, Implemented DecodeInterleavedWithPerformance	2018-10-11 16:52:13 -04:00
bunnei	3fd26b7147	Merge pull request #1472 from lioncash/san svc: Add missing address range sanitizing checks to MapMemory/UnmapMemory	2018-10-11 16:51:41 -04:00
bunnei	bc293e1751	Merge pull request #1476 from bunnei/fix-unmap-flush nvhost_as_gpu: Flush/invalidate CPU VAddr on UnmapBuffer.	2018-10-11 16:51:28 -04:00
bunnei	83ac3e6395	Merge pull request #1477 from ReinUsesLisp/vmad gl_shader_decompiler: Implement VMAD	2018-10-11 16:51:09 -04:00
David Marcec	c7763603ef	Added error codes for nvmap	2018-10-11 23:06:34 +11:00
David Marcec	5dd538cace	Passing an invalid nmap handle to Remap should throw an error Added error for invalid nmap handles	2018-10-11 20:32:21 +11:00
ReinUsesLisp	17290a4416	gl_shader_decompiler: Implement VMAD	2018-10-11 04:15:10 -03:00
bunnei	bf795edac4	nvhost_as_gpu: Flush CPU VAddr on UnmapBuffer.	2018-10-11 00:19:36 -04:00
David Marcec	fa10905e1e	HwOpus, Implemented DecodeInterleavedWithPerformance Used by sonic ages	2018-10-11 13:06:56 +11:00
bunnei	6d82c4adf9	Merge pull request #1458 from FernandoS27/fix-render-target-block-settings Fixed block height settings for RenderTargets and Depth Buffers	2018-10-10 21:24:07 -04:00
Lioncash	72e9cb523e	svc: Add missing address range sanitizing checks to MapMemory/UnmapMemory This adds the missing address range checking that the service functions do before attempting to map or unmap memory. Given that both service functions perform the same set of checks in the same order, we can wrap these into a function and just call it from both functions, which deduplicates a little bit of code.	2018-10-10 20:30:49 -04:00
bunnei	03ec936ca0	Merge pull request #1460 from FernandoS27/scissor_test Implemented Scissor Testing	2018-10-10 12:04:10 -04:00
bunnei	ee1b204749	Merge pull request #1425 from ReinUsesLisp/geometry-shaders gl_shader_decompiler: Implement geometry shaders	2018-10-10 11:51:29 -04:00
bunnei	68b3d8b7a9	Merge pull request #1469 from lioncash/ptr kernel/thread: Use a regular pointer for the owner/current process	2018-10-10 10:34:20 -04:00
FernandoS27	5f4ee6f0c8	Add memory Layout to Render Targets and Depth Buffers	2018-10-09 22:28:19 -04:00
David Marcec	2db37ddea9	Changed all casts in svc_wrap.h to be static_cast instead	2018-10-10 12:49:08 +11:00
David Marcec	09b6dda8f0	Use a better name than "dont_kill_application" signal_debugger seems like a more fitting name	2018-10-10 12:27:44 +11:00
David Marcec	a4412c8e22	Fixed incorrect types for svcBreak svcBreak reason should be a u32, not a u64.	2018-10-10 12:23:50 +11:00
FernandoS27	af653906d0	Fixed block height settings for RenderTargets and Depth Buffers, and added block width and block depth	2018-10-09 21:14:32 -04:00
FernandoS27	be97fc884d	Implement Scissor Test	2018-10-08 21:36:23 -04:00
FernandoS27	30ff42b8cc	Assert Scissor tests	2018-10-08 20:49:36 -04:00
ReinUsesLisp	7c2d6ef210	gl_shader_decompiler: Move position varying location from 15 to 0 and apply an offset	2018-10-07 17:36:00 -03:00
ReinUsesLisp	ee4d538850	gl_shader_decompiler: Implement geometry shaders	2018-10-07 17:36:00 -03:00
ReinUsesLisp	4d0c682468	video_core: Allow LabelGLObject to use extra info on any object	2018-10-07 17:27:49 -03:00