Port citra #3616

Merge pull request #499 from bunnei/am-stuff
am: Implement CreateStorage, PushInData, etc.
2018-06-04 10:57:18 -05:00 · 2018-06-03 23:43:52 -04:00 · 2018-06-03 22:10:06 -04:00 · 2018-06-03 22:10:06 -04:00 · 2018-06-03 22:10:06 -04:00 · 2018-06-03 22:10:05 -04:00
14 changed files with 318 additions and 92 deletions
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -53,7 +53,7 @@ build_script:
          # https://www.appveyor.com/docs/build-phase
          msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
        } else {
-          C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1'
+          C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -j4 -C mingw_build/ 2>&1'
        }

 after_build:
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -175,6 +175,8 @@ add_library(core STATIC
    hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
    hle/service/nvdrv/devices/nvhost_gpu.cpp
    hle/service/nvdrv/devices/nvhost_gpu.h
+    hle/service/nvdrv/devices/nvhost_nvdec.cpp
+    hle/service/nvdrv/devices/nvhost_nvdec.h
    hle/service/nvdrv/devices/nvmap.cpp
    hle/service/nvdrv/devices/nvmap.h
    hle/service/nvdrv/interface.cpp
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -133,8 +133,11 @@ static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) {

        auto lock_owner = thread->lock_owner;
        // Threads waking up by timeout from WaitProcessWideKey do not perform priority inheritance
-        // and don't have a lock owner.
-        ASSERT(lock_owner == nullptr);
+        // and don't have a lock owner unless SignalProcessWideKey was called first and the thread
+        // wasn't awakened due to the mutex already being acquired.
+        if (lock_owner) {
+            lock_owner->RemoveMutexWaiter(thread);
+        }
    }

    if (resume)
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.

 #include <cinttypes>
+#include <stack>
 #include "core/file_sys/filesystem.h"
 #include "core/hle/ipc_helpers.h"
 #include "core/hle/kernel/event.h"
@@ -348,6 +349,87 @@ void ICommonStateGetter::GetPerformanceMode(Kernel::HLERequestContext& ctx) {
    NGLOG_WARNING(Service_AM, "(STUBBED) called");
 }

+class IStorageAccessor final : public ServiceFramework<IStorageAccessor> {
+public:
+    explicit IStorageAccessor(std::vector<u8> buffer)
+        : ServiceFramework("IStorageAccessor"), buffer(std::move(buffer)) {
+        static const FunctionInfo functions[] = {
+            {0, &IStorageAccessor::GetSize, "GetSize"},
+            {10, &IStorageAccessor::Write, "Write"},
+            {11, &IStorageAccessor::Read, "Read"},
+        };
+        RegisterHandlers(functions);
+    }
+
+private:
+    std::vector<u8> buffer;
+
+    void GetSize(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 4};
+
+        rb.Push(RESULT_SUCCESS);
+        rb.Push(static_cast<u64>(buffer.size()));
+
+        NGLOG_DEBUG(Service_AM, "called");
+    }
+
+    void Write(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+
+        const u64 offset{rp.Pop<u64>()};
+        const std::vector<u8> data{ctx.ReadBuffer()};
+
+        ASSERT(offset + data.size() <= buffer.size());
+
+        std::memcpy(&buffer[offset], data.data(), data.size());
+
+        IPC::ResponseBuilder rb{rp.MakeBuilder(2, 0, 0)};
+        rb.Push(RESULT_SUCCESS);
+
+        NGLOG_DEBUG(Service_AM, "called, offset={}", offset);
+    }
+
+    void Read(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+
+        const u64 offset{rp.Pop<u64>()};
+        const size_t size{ctx.GetWriteBufferSize()};
+
+        ASSERT(offset + size <= buffer.size());
+
+        ctx.WriteBuffer(buffer.data() + offset, size);
+
+        IPC::ResponseBuilder rb{rp.MakeBuilder(2, 0, 0)};
+        rb.Push(RESULT_SUCCESS);
+
+        NGLOG_DEBUG(Service_AM, "called, offset={}", offset);
+    }
+};
+
+class IStorage final : public ServiceFramework<IStorage> {
+public:
+    explicit IStorage(std::vector<u8> buffer)
+        : ServiceFramework("IStorage"), buffer(std::move(buffer)) {
+        static const FunctionInfo functions[] = {
+            {0, &IStorage::Open, "Open"},
+            {1, nullptr, "OpenTransferStorage"},
+        };
+        RegisterHandlers(functions);
+    }
+
+private:
+    std::vector<u8> buffer;
+
+    void Open(Kernel::HLERequestContext& ctx) {
+        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+
+        rb.Push(RESULT_SUCCESS);
+        rb.PushIpcInterface<AM::IStorageAccessor>(buffer);
+
+        NGLOG_DEBUG(Service_AM, "called");
+    }
+};
+
 class ILibraryAppletAccessor final : public ServiceFramework<ILibraryAppletAccessor> {
 public:
    explicit ILibraryAppletAccessor() : ServiceFramework("ILibraryAppletAccessor") {
@@ -359,7 +441,7 @@ public:
            {25, nullptr, "Terminate"},
            {30, nullptr, "GetResult"},
            {50, nullptr, "SetOutOfFocusApplicationSuspendingEnabled"},
-            {100, nullptr, "PushInData"},
+            {100, &ILibraryAppletAccessor::PushInData, "PushInData"},
            {101, nullptr, "PopOutData"},
            {102, nullptr, "PushExtraStorage"},
            {103, nullptr, "PushInteractiveInData"},
@@ -388,6 +470,17 @@ private:
        NGLOG_WARNING(Service_AM, "(STUBBED) called");
    }

+    void PushInData(Kernel::HLERequestContext& ctx) {
+        IPC::RequestParser rp{ctx};
+        storage_stack.push(rp.PopIpcInterface<AM::IStorage>());
+
+        IPC::ResponseBuilder rb{rp.MakeBuilder(2, 0, 0)};
+        rb.Push(RESULT_SUCCESS);
+
+        NGLOG_DEBUG(Service_AM, "called");
+    }
+
+    std::stack<std::shared_ptr<AM::IStorage>> storage_stack;
    Kernel::SharedPtr<Kernel::Event> state_changed_event;
 };

@@ -396,7 +489,7 @@ ILibraryAppletCreator::ILibraryAppletCreator() : ServiceFramework("ILibraryApple
        {0, &ILibraryAppletCreator::CreateLibraryApplet, "CreateLibraryApplet"},
        {1, nullptr, "TerminateAllLibraryApplets"},
        {2, nullptr, "AreAnyLibraryAppletsLeft"},
-        {10, nullptr, "CreateStorage"},
+        {10, &ILibraryAppletCreator::CreateStorage, "CreateStorage"},
        {11, nullptr, "CreateTransferMemoryStorage"},
        {12, nullptr, "CreateHandleStorage"},
    };
@@ -412,72 +505,17 @@ void ILibraryAppletCreator::CreateLibraryApplet(Kernel::HLERequestContext& ctx)
    NGLOG_DEBUG(Service_AM, "called");
 }

-class IStorageAccessor final : public ServiceFramework<IStorageAccessor> {
-public:
-    explicit IStorageAccessor(std::vector<u8> buffer)
-        : ServiceFramework("IStorageAccessor"), buffer(std::move(buffer)) {
-        static const FunctionInfo functions[] = {
-            {0, &IStorageAccessor::GetSize, "GetSize"},
-            {10, nullptr, "Write"},
-            {11, &IStorageAccessor::Read, "Read"},
-        };
-        RegisterHandlers(functions);
-    }
+void ILibraryAppletCreator::CreateStorage(Kernel::HLERequestContext& ctx) {
+    IPC::RequestParser rp{ctx};
+    const u64 size{rp.Pop<u64>()};
+    std::vector<u8> buffer(size);

-private:
-    std::vector<u8> buffer;
+    IPC::ResponseBuilder rb{rp.MakeBuilder(2, 0, 1)};
+    rb.Push(RESULT_SUCCESS);
+    rb.PushIpcInterface<AM::IStorage>(std::move(buffer));

-    void GetSize(Kernel::HLERequestContext& ctx) {
-        IPC::ResponseBuilder rb{ctx, 4};
-
-        rb.Push(RESULT_SUCCESS);
-        rb.Push(static_cast<u64>(buffer.size()));
-
-        NGLOG_DEBUG(Service_AM, "called");
-    }
-
-    void Read(Kernel::HLERequestContext& ctx) {
-        IPC::RequestParser rp{ctx};
-
-        u64 offset = rp.Pop<u64>();
-
-        const size_t size{ctx.GetWriteBufferSize()};
-
-        ASSERT(offset + size <= buffer.size());
-
-        ctx.WriteBuffer(buffer.data() + offset, size);
-
-        IPC::ResponseBuilder rb{ctx, 2};
-
-        rb.Push(RESULT_SUCCESS);
-
-        NGLOG_DEBUG(Service_AM, "called");
-    }
-};
-
-class IStorage final : public ServiceFramework<IStorage> {
-public:
-    explicit IStorage(std::vector<u8> buffer)
-        : ServiceFramework("IStorage"), buffer(std::move(buffer)) {
-        static const FunctionInfo functions[] = {
-            {0, &IStorage::Open, "Open"},
-            {1, nullptr, "OpenTransferStorage"},
-        };
-        RegisterHandlers(functions);
-    }
-
-private:
-    std::vector<u8> buffer;
-
-    void Open(Kernel::HLERequestContext& ctx) {
-        IPC::ResponseBuilder rb{ctx, 2, 0, 1};
-
-        rb.Push(RESULT_SUCCESS);
-        rb.PushIpcInterface<AM::IStorageAccessor>(buffer);
-
-        NGLOG_DEBUG(Service_AM, "called");
-    }
-};
+    NGLOG_DEBUG(Service_AM, "called, size={}", size);
+}

 IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationFunctions") {
    static const FunctionInfo functions[] = {
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -121,6 +121,7 @@ public:

 private:
    void CreateLibraryApplet(Kernel::HLERequestContext& ctx);
+    void CreateStorage(Kernel::HLERequestContext& ctx);
 };

 class IApplicationFunctions final : public ServiceFramework<IApplicationFunctions> {
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -0,0 +1,32 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
+
+namespace Service::Nvidia::Devices {
+
+u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
+    NGLOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
+                command.raw, input.size(), output.size());
+
+    switch (static_cast<IoctlCommand>(command.raw)) {
+    case IoctlCommand::IocSetNVMAPfdCommand:
+        return SetNVMAPfd(input, output);
+    }
+
+    UNIMPLEMENTED_MSG("Unimplemented ioctl");
+    return 0;
+}
+
+u32 nvhost_nvdec::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) {
+    IoctlSetNvmapFD params{};
+    std::memcpy(&params, input.data(), input.size());
+    NGLOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
+    nvmap_fd = params.nvmap_fd;
+    return 0;
+}
+
+} // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -0,0 +1,38 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstdlib>
+#include <cstring>
+#include <vector>
+#include "common/common_types.h"
+#include "core/hle/service/nvdrv/devices/nvdevice.h"
+
+namespace Service::Nvidia::Devices {
+
+class nvhost_nvdec final : public nvdevice {
+public:
+    nvhost_nvdec() = default;
+    ~nvhost_nvdec() override = default;
+
+    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
+
+private:
+    enum class IoctlCommand : u32_le {
+        IocSetNVMAPfdCommand = 0x40044801,
+    };
+
+    struct IoctlSetNvmapFD {
+        u32_le nvmap_fd;
+    };
+    static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size");
+
+    u32_le nvmap_fd{};
+
+    u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output);
+};
+
+} // namespace Service::Nvidia::Devices
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -9,6 +9,7 @@
 #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h"
 #include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h"
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
+#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h"
 #include "core/hle/service/nvdrv/devices/nvmap.h"
 #include "core/hle/service/nvdrv/interface.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
@@ -36,6 +37,7 @@ Module::Module() {
    devices["/dev/nvmap"] = nvmap_dev;
    devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev);
    devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>();
+    devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>();
 }

 u32 Module::Open(std::string device_name) {
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -156,16 +156,15 @@ void Maxwell3D::ProcessQueryGet() {
    // TODO(Subv): Support the other query units.
    ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
               "Units other than CROP are unimplemented");
-    ASSERT_MSG(regs.query.query_get.short_query,
-               "Writing the entire query result structure is unimplemented");

    u32 value = Memory::Read32(*address);
-    u32 result = 0;
+    u64 result = 0;

    // TODO(Subv): Support the other query variables
    switch (regs.query.query_get.select) {
    case Regs::QuerySelect::Zero:
-        result = 0;
+        // This seems to actually write the query sequence to the query address.
+        result = regs.query.query_sequence;
        break;
    default:
        UNIMPLEMENTED_MSG("Unimplemented query select type {}",
@@ -174,15 +173,31 @@ void Maxwell3D::ProcessQueryGet() {

    // TODO(Subv): Research and implement how query sync conditions work.

+    struct LongQueryResult {
+        u64_le value;
+        u64_le timestamp;
+    };
+    static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
+
    switch (regs.query.query_get.mode) {
    case Regs::QueryMode::Write:
    case Regs::QueryMode::Write2: {
-        // Write the current query sequence to the sequence address.
        u32 sequence = regs.query.query_sequence;
-        Memory::Write32(*address, sequence);
-
-        // TODO(Subv): Write the proper query response structure to the address when not using short
-        // mode.
+        if (regs.query.query_get.short_query) {
+            // Write the current query sequence to the sequence address.
+            // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
+            // query.
+            Memory::Write32(*address, sequence);
+        } else {
+            // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
+            // GPU, this command may actually take a while to complete in real hardware due to GPU
+            // wait queues.
+            LongQueryResult query_result{};
+            query_result.value = result;
+            // TODO(Subv): Generate a real GPU timestamp and write it here instead of 0
+            query_result.timestamp = 0;
+            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
+        }
        break;
    }
    default:
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -261,6 +261,33 @@ union Instruction {
        BitField<50, 1, u64> saturate_a;
    } conversion;

+    union {
+        BitField<31, 4, u64> component_mask;
+
+        bool IsComponentEnabled(size_t component) const {
+            return ((1 << component) & component_mask) != 0;
+        }
+    } tex;
+
+    union {
+        BitField<50, 3, u64> component_mask_selector;
+        BitField<28, 8, Register> gpr28;
+
+        bool HasTwoDestinations() const {
+            return gpr28.Value() != Register::ZeroIndex;
+        }
+
+        bool IsComponentEnabled(size_t component) const {
+            static constexpr std::array<size_t, 5> one_dest_mask{0x1, 0x2, 0x4, 0x8, 0x3};
+            static constexpr std::array<size_t, 5> two_dest_mask{0x7, 0xb, 0xd, 0xe, 0xf};
+            const auto& mask{HasTwoDestinations() ? two_dest_mask : one_dest_mask};
+
+            ASSERT(component_mask_selector < mask.size());
+
+            return ((1 << component) & mask[component_mask_selector]) != 0;
+        }
+    } texs;
+
    BitField<61, 1, u64> is_b_imm;
    BitField<60, 1, u64> is_b_gpr;
    BitField<59, 1, u64> is_c_gpr;
@@ -281,6 +308,7 @@ public:
        KIL,
        LD_A,
        ST_A,
+        TEX,
        TEXQ, // Texture Query
        TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
        TLDS, // Texture Load with scalar/non-vec4 source/destinations
@@ -297,8 +325,10 @@ public:
        FMUL_R,
        FMUL_IMM,
        FMUL32_IMM,
-        MUFU, // Multi-Function Operator
-        RRO,  // Range Reduction Operator
+        MUFU,  // Multi-Function Operator
+        RRO_C, // Range Reduction Operator
+        RRO_R,
+        RRO_IMM,
        F2F_C,
        F2F_R,
        F2F_IMM,
@@ -442,6 +472,7 @@ private:
            INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
            INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
            INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
+            INST("1100000000111---", Id::TEX, Type::Memory, "TEX"),
            INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
            INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
            INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
@@ -459,7 +490,9 @@ private:
            INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
            INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"),
            INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
-            INST("0101110010010---", Id::RRO, Type::Arithmetic, "RRO"),
+            INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
+            INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
+            INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"),
            INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
            INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
            INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -53,6 +53,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
    {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true},   // DXT1
    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
+    {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true},           // DXN1
 }};

 static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
@@ -113,7 +114,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
        MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
        MortonCopy<true, PixelFormat::R8>,          MortonCopy<true, PixelFormat::RGBA16F>,
        MortonCopy<true, PixelFormat::DXT1>,        MortonCopy<true, PixelFormat::DXT23>,
-        MortonCopy<true, PixelFormat::DXT45>,
+        MortonCopy<true, PixelFormat::DXT45>,       MortonCopy<true, PixelFormat::DXN1>,
 };

 static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
@@ -126,7 +127,8 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
        MortonCopy<false, PixelFormat::A1B5G5R5>,
        MortonCopy<false, PixelFormat::R8>,
        MortonCopy<false, PixelFormat::RGBA16F>,
-        // TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported
+        // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported
+        nullptr,
        nullptr,
        nullptr,
        nullptr,
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -63,6 +63,7 @@ struct SurfaceParams {
        DXT1 = 6,
        DXT23 = 7,
        DXT45 = 8,
+        DXN1 = 9, // This is also known as BC4

        Max,
        Invalid = 255,
@@ -107,6 +108,7 @@ struct SurfaceParams {
            4, // DXT1
            4, // DXT23
            4, // DXT45
+            4, // DXN1
        }};

        ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -130,6 +132,7 @@ struct SurfaceParams {
            64,  // DXT1
            128, // DXT23
            128, // DXT45
+            64,  // DXN1
        }};

        ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -185,6 +188,8 @@ struct SurfaceParams {
            return PixelFormat::DXT23;
        case Tegra::Texture::TextureFormat::DXT45:
            return PixelFormat::DXT45;
+        case Tegra::Texture::TextureFormat::DXN1:
+            return PixelFormat::DXN1;
        default:
            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
            UNREACHABLE();
@@ -212,6 +217,8 @@ struct SurfaceParams {
            return Tegra::Texture::TextureFormat::DXT23;
        case PixelFormat::DXT45:
            return Tegra::Texture::TextureFormat::DXT45;
+        case PixelFormat::DXN1:
+            return Tegra::Texture::TextureFormat::DXN1;
        default:
            UNREACHABLE();
        }
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -792,8 +792,13 @@ private:
                                        1, 1);
                break;
            }
-            case OpCode::Id::RRO: {
-                NGLOG_DEBUG(HW_GPU, "Skipping RRO instruction");
+            case OpCode::Id::RRO_C:
+            case OpCode::Id::RRO_R:
+            case OpCode::Id::RRO_IMM: {
+                // Currently RRO is only implemented as a register move.
+                // Usage of `abs_b` and `negate_b` here should also be correct.
+                regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
+                NGLOG_WARNING(HW_GPU, "RRO instruction is incomplete");
                break;
            }
            default: {
@@ -891,10 +896,10 @@ private:
                                                  instr.gpr0);
                break;
            }
-            case OpCode::Id::TEXS: {
+            case OpCode::Id::TEX: {
                ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
                const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
-                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
+                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
                const std::string sampler = GetSampler(instr.sampler);
                const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
                // Add an extra scope and declare the texture coords inside to prevent overwriting
@@ -903,8 +908,52 @@ private:
                ++shader.scope;
                shader.AddLine(coord);
                const std::string texture = "texture(" + sampler + ", coords)";
-                for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
-                    regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, elem);
+
+                size_t dest_elem{};
+                for (size_t elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
+                    if (!instr.tex.IsComponentEnabled(elem)) {
+                        // Skip disabled components
+                        continue;
+                    }
+                    regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem);
+                    ++dest_elem;
+                }
+                --shader.scope;
+                shader.AddLine("}");
+                break;
+            }
+            case OpCode::Id::TEXS: {
+                ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
+                const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
+                const std::string sampler = GetSampler(instr.sampler);
+                const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+                // Add an extra scope and declare the texture coords inside to prevent
+                // overwriting them in case they are used as outputs of the texs instruction.
+                shader.AddLine("{");
+                ++shader.scope;
+                shader.AddLine(coord);
+                const std::string texture = "texture(" + sampler + ", coords)";
+
+                // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA goes
+                // into gpr28+0 and gpr28+1
+                size_t offset{};
+
+                for (const auto& dest : {instr.gpr0.Value(), instr.gpr28.Value()}) {
+                    for (unsigned elem = 0; elem < 2; ++elem) {
+                        if (!instr.texs.IsComponentEnabled(elem)) {
+                            // Skip disabled components
+                            continue;
+                        }
+                        regs.SetRegisterToFloat(dest, elem + offset, texture, 1, 4, false, elem);
+                    }
+
+                    if (!instr.texs.HasTwoDestinations()) {
+                        // Skip the second destination
+                        break;
+                    }
+
+                    offset += 2;
                }
                --shader.scope;
                shader.AddLine("}");
@@ -961,8 +1010,8 @@ private:
                         '(' + predicate + ") " + combiner + " (" + second_pred + ')');

            if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-                // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
-                // enabled
+                // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+                // if enabled
                SetPredicate(instr.fsetp.pred0,
                             "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
            }
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -46,6 +46,7 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_
 u32 BytesPerPixel(TextureFormat format) {
    switch (format) {
    case TextureFormat::DXT1:
+    case TextureFormat::DXN1:
        // In this case a 'pixel' actually refers to a 4x4 tile.
        return 8;
    case TextureFormat::DXT23:
@@ -79,7 +80,9 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
    case TextureFormat::DXT1:
    case TextureFormat::DXT23:
    case TextureFormat::DXT45:
-        // In the DXT formats, each 4x4 tile is swizzled instead of just individual pixel values.
+    case TextureFormat::DXN1:
+        // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel
+        // values.
        CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
                         unswizzled_data.data(), true, block_height);
        break;
@@ -109,6 +112,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
    case TextureFormat::DXT1:
    case TextureFormat::DXT23:
    case TextureFormat::DXT45:
+    case TextureFormat::DXN1:
    case TextureFormat::A8R8G8B8:
    case TextureFormat::A2B10G10R10:
    case TextureFormat::A1B5G5R5:
Author	SHA1	Message	Date
Valentin Vanelslande	5c82400ef8	Port citra #3616	2018-06-04 10:57:18 -05:00
bunnei	049ce242a4	Merge pull request #499 from bunnei/am-stuff am: Implement CreateStorage, PushInData, etc.	2018-06-03 23:43:52 -04:00
bunnei	876b805e50	am: Implement ILibraryAppletAccessor::PushInData.	2018-06-03 22:10:06 -04:00
bunnei	2dcb98226b	am: Implement IStorageAccessor::Write.	2018-06-03 22:10:06 -04:00
bunnei	9fedfbe141	am: Cleanup IStorageAccessor::Read.	2018-06-03 22:10:06 -04:00
bunnei	d73c22bf4d	am: Implement ILibraryAppletCreator::CreateStorage.	2018-06-03 22:10:05 -04:00
bunnei	ba117854f9	Merge pull request #500 from Subv/long_queries GPU: Partial implementation of long GPU queries.	2018-06-03 21:24:50 -04:00
bunnei	527c098ff6	Merge pull request #498 from bunnei/texs-mask gl_shader_decompiler: Implement TEXS component mask.	2018-06-03 21:22:12 -04:00
Subv	d57333406d	GPU: Partial implementation of long GPU queries. Long queries write a 128-bit result value to memory, which consists of a 64 bit query value and a 64 bit timestamp. In this implementation, only select=Zero of the Crop unit is implemented, this writes the query sequence as a 64 bit value, and a 0u64 value for the timestamp, since we emulate an infinitely fast GPU. This specific type was hwtested, but more rigorous tests should be performed in the future for the other types.	2018-06-03 19:17:31 -05:00
bunnei	1efcba346a	gl_shader_decompiler: Implement TEXS component mask.	2018-06-03 12:08:17 -04:00
bunnei	bb9d39b8fe	Merge pull request #494 from bunnei/shader-tex gl_shader_decompiler: Implement TEX, fixes for TEXS.	2018-06-03 12:05:38 -04:00
bunnei	27c0f9e02d	Merge pull request #495 from bunnei/improve-rro gl_shader_decompiler: Implement RRO as a register move.	2018-06-03 12:05:26 -04:00
bunnei	41faeeeb03	Merge pull request #484 from mailwl/nvhost-nvdec Services/nvdrv: add '/dev/nvhost-nvdec' device	2018-06-03 11:18:00 -04:00
bunnei	63270e588b	Merge pull request #496 from Subv/waitprocesswidekey_timeout Kernel/Threads: A thread waking up by timeout from a WaitProcessWideKey may already have an assigned lock owner.	2018-06-03 11:15:49 -04:00
bunnei	e54ea773fc	gl_shader_decompiler: Implement RRO as a register move.	2018-06-03 11:14:31 -04:00
bunnei	0d64ddc6dd	Merge pull request #497 from Subv/dxn1 GPU: Implemented the DXN1 (BC4) texture format.	2018-06-03 11:05:04 -04:00
Subv	9cd87a6352	Kernel/Threads: A thread waking up by timeout from a WaitProcessWideKey may already have an assigned lock owner. This situation may happen like so: Thread 1 with low priority calls WaitProcessWideKey with timeout. Thread 2 with high priority calls WaitProcessWideKey without timeout. Thread 3 calls SignalProcessWideKey - Thread 2 acquires the lock and awakens. - Thread 1 can't acquire the lock and is put to sleep with the lock owner being Thread 2. Thread 1's timeout expires, with the lock owner still being set to Thread 2.	2018-06-02 14:06:35 -05:00
Subv	99f9d47d16	GPU: Implemented the DXN1 (BC4) texture format.	2018-06-02 13:17:09 -05:00
bunnei	bbbe34429e	Merge pull request #492 from mailwl/time Service/time: implement posix time to calendar conversion	2018-06-01 10:14:20 -04:00
bunnei	888eb345c0	gl_shader_decompiler: Implement TEX instruction.	2018-05-31 23:36:45 -04:00
bunnei	4c727d0ba8	gl_shader_decompiler: Support multi-destination for TEXS.	2018-05-31 22:57:32 -04:00
mailwl	9a273bb23b	Services/nvdrv: add '/dev/nvhost-nvdec' device	2018-05-30 12:49:28 +03:00