texture_cache/format_lookup_table: Fix incorrect green, blue, and alpha indices

Previously these were all using the red component to derive the indices, which is definitely not intentional.
Merge pull request #3612 from ReinUsesLisp/red
2020-04-15 23:50:46 -04:00 · 2020-04-15 15:03:49 -04:00 · 2020-04-15 14:40:05 -04:00 · 2020-04-15 14:14:41 -04:00 · 2020-04-15 11:54:50 -04:00 · 2020-04-15 11:43:52 -04:00
20 changed files with 201 additions and 91 deletions
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -53,6 +53,7 @@ if (MSVC)
 else()
    add_compile_options(
        -Wall
+        -Werror=reorder
        -Wno-attributes
    )

--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -103,7 +103,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_

 struct KernelCore::Impl {
    explicit Impl(Core::System& system, KernelCore& kernel)
-        : system{system}, global_scheduler{kernel}, synchronization{system}, time_manager{system} {}
+        : global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {}

    void Initialize(KernelCore& kernel) {
        Shutdown();
--- a/src/core/hle/service/friend/friend.cpp
+++ b/src/core/hle/service/friend/friend.cpp
@@ -27,7 +27,7 @@ public:
            {10110, nullptr, "GetFriendProfileImage"},
            {10200, nullptr, "SendFriendRequestForApplication"},
            {10211, nullptr, "AddFacedFriendRequestForApplication"},
-            {10400, nullptr, "GetBlockedUserListIds"},
+            {10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"},
            {10500, nullptr, "GetProfileList"},
            {10600, nullptr, "DeclareOpenOnlinePlaySession"},
            {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"},
@@ -121,6 +121,15 @@ private:
    };
    static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size");

+    void GetBlockedUserListIds(Kernel::HLERequestContext& ctx) {
+        // This is safe to stub, as there should be no adverse consequences from reporting no
+        // blocked users.
+        LOG_WARNING(Service_ACC, "(STUBBED) called");
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.Push<u32>(0); // Indicates there are no blocked users
+    }
+
    void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) {
        // Stub used by Splatoon 2
        LOG_WARNING(Service_ACC, "(STUBBED) called");
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -303,6 +303,10 @@ public:
                return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
            }

+            bool IsConstant() const {
+                return constant;
+            }
+
            bool IsValid() const {
                return size != Size::Invalid;
            }
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1005,6 +1005,12 @@ union Instruction {
        BitField<46, 2, u64> cache_mode;
    } stg;

+    union {
+        BitField<23, 3, AtomicOp> operation;
+        BitField<48, 1, u64> extended;
+        BitField<20, 3, GlobalAtomicType> type;
+    } red;
+
    union {
        BitField<52, 4, AtomicOp> operation;
        BitField<49, 3, GlobalAtomicType> type;
@@ -1787,6 +1793,7 @@ public:
        ST_S,
        ST,    // Store in generic memory
        STG,   // Store in global memory
+        RED,   // Reduction operation
        ATOM,  // Atomic operation on global memory
        ATOMS, // Atomic operation on shared memory
        AL2P,  // Transforms attribute memory into physical memory
@@ -1871,7 +1878,8 @@ public:
        ICMP_R,
        ICMP_CR,
        ICMP_IMM,
-        FCMP_R,
+        FCMP_RR,
+        FCMP_RC,
        MUFU,  // Multi-Function Operator
        RRO_C, // Range Reduction Operator
        RRO_R,
@@ -2096,6 +2104,7 @@ private:
            INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
            INST("101-------------", Id::ST, Type::Memory, "ST"),
            INST("1110111011011---", Id::STG, Type::Memory, "STG"),
+            INST("1110101111111---", Id::RED, Type::Memory, "RED"),
            INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
            INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
            INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
@@ -2179,7 +2188,8 @@ private:
            INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
            INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
            INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
-            INST("010110111010----", Id::FCMP_R, Type::Arithmetic, "FCMP_R"),
+            INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
+            INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
            INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
            INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
            INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -12,8 +12,9 @@ namespace VideoCommon {

 GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_,
                     std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
-    : GPU(system, std::move(renderer_), true), gpu_thread{system}, gpu_context(std::move(context)),
-      cpu_context(renderer->GetRenderWindow().CreateSharedContext()) {}
+    : GPU(system, std::move(renderer_), true), gpu_thread{system},
+      cpu_context(renderer->GetRenderWindow().CreateSharedContext()),
+      gpu_context(std::move(context)) {}

 GPUAsynch::~GPUAsynch() = default;

--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -140,8 +140,8 @@ void RasterizerOpenGL::SetupVertexFormat() {
        const auto attrib = gpu.regs.vertex_attrib_format[index];
        const auto gl_index = static_cast<GLuint>(index);

-        // Ignore invalid attributes.
-        if (!attrib.IsValid()) {
+        // Disable constant attributes.
+        if (attrib.IsConstant()) {
            glDisableVertexAttribArray(gl_index);
            continue;
        }
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -34,6 +34,8 @@
 namespace OpenGL {

 using Tegra::Engines::ShaderType;
+using VideoCommon::Shader::CompileDepth;
+using VideoCommon::Shader::CompilerSettings;
 using VideoCommon::Shader::ProgramCode;
 using VideoCommon::Shader::Registry;
 using VideoCommon::Shader::ShaderIR;
@@ -43,7 +45,7 @@ namespace {
 constexpr u32 STAGE_MAIN_OFFSET = 10;
 constexpr u32 KERNEL_MAIN_OFFSET = 0;

-constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
+constexpr CompilerSettings COMPILER_SETTINGS{CompileDepth::FullDecompile};

 /// Gets the address for the specified shader stage program
 GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1821,13 +1821,15 @@ private:
    Expression HMergeH0(Operation operation) {
        const std::string dest = VisitOperand(operation, 0).AsUint();
        const std::string src = VisitOperand(operation, 1).AsUint();
-        return {fmt::format("bitfieldInsert({}, {}, 0, 16)", dest, src), Type::Uint};
+        return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest),
+                Type::HalfFloat};
    }

    Expression HMergeH1(Operation operation) {
        const std::string dest = VisitOperand(operation, 0).AsUint();
        const std::string src = VisitOperand(operation, 1).AsUint();
-        return {fmt::format("bitfieldInsert({}, {}, 16, 16)", dest, src), Type::Uint};
+        return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src),
+                Type::HalfFloat};
    }

    Expression HPack2(Operation operation) {
@@ -2117,8 +2119,14 @@ private:
            return {};
        }
        return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
-                            Visit(operation[1]).As(type)),
-                type};
+                            Visit(operation[1]).AsUint()),
+                Type::Uint};
+    }
+
+    template <const std::string_view& opname, Type type>
+    Expression Reduce(Operation operation) {
+        code.AddLine("{};", Atomic<opname, type>(operation).GetCode());
+        return {};
    }

    Expression Branch(Operation operation) {
@@ -2477,6 +2485,20 @@ private:
        &GLSLDecompiler::Atomic<Func::Or, Type::Int>,
        &GLSLDecompiler::Atomic<Func::Xor, Type::Int>,

+        &GLSLDecompiler::Reduce<Func::Add, Type::Uint>,
+        &GLSLDecompiler::Reduce<Func::Min, Type::Uint>,
+        &GLSLDecompiler::Reduce<Func::Max, Type::Uint>,
+        &GLSLDecompiler::Reduce<Func::And, Type::Uint>,
+        &GLSLDecompiler::Reduce<Func::Or, Type::Uint>,
+        &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>,
+
+        &GLSLDecompiler::Reduce<Func::Add, Type::Int>,
+        &GLSLDecompiler::Reduce<Func::Min, Type::Int>,
+        &GLSLDecompiler::Reduce<Func::Max, Type::Int>,
+        &GLSLDecompiler::Reduce<Func::And, Type::Int>,
+        &GLSLDecompiler::Reduce<Func::Or, Type::Int>,
+        &GLSLDecompiler::Reduce<Func::Xor, Type::Int>,
+
        &GLSLDecompiler::Branch,
        &GLSLDecompiler::BranchIndirect,
        &GLSLDecompiler::PushFlowStack,
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -417,7 +417,7 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {

        switch (params.target) {
        case SurfaceTarget::Texture2DArray:
-            glFramebufferTexture(target, attachment, GetTexture(), params.base_level);
+            glFramebufferTexture(target, attachment, GetTexture(), 0);
            break;
        default:
            UNIMPLEMENTED();
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -315,8 +315,8 @@ public:

 RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
                               Core::Frontend::GraphicsContext& context)
-    : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
-      frame_mailbox{}, context{context}, has_debug_tool{HasDebugTool()} {}
+    : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
+      has_debug_tool{HasDebugTool()} {}

 RendererOpenGL::~RendererOpenGL() = default;

--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -360,6 +360,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
        default:
            break;
        }
+        break;
    case Maxwell::VertexAttribute::Type::UnsignedInt:
        switch (size) {
        case Maxwell::VertexAttribute::Size::Size_8:
@@ -370,6 +371,14 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
            return VK_FORMAT_R8G8B8_UINT;
        case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
            return VK_FORMAT_R8G8B8A8_UINT;
+        case Maxwell::VertexAttribute::Size::Size_16:
+            return VK_FORMAT_R16_UINT;
+        case Maxwell::VertexAttribute::Size::Size_16_16:
+            return VK_FORMAT_R16G16_UINT;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16:
+            return VK_FORMAT_R16G16B16_UINT;
+        case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+            return VK_FORMAT_R16G16B16A16_UINT;
        case Maxwell::VertexAttribute::Size::Size_32:
            return VK_FORMAT_R32_UINT;
        case Maxwell::VertexAttribute::Size::Size_32_32:
@@ -381,6 +390,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
        default:
            break;
        }
+        break;
    case Maxwell::VertexAttribute::Type::UnsignedScaled:
        switch (size) {
        case Maxwell::VertexAttribute::Size::Size_8:
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -535,7 +535,9 @@ void VKBlitScreen::CreateGraphicsPipeline() {
    viewport_state_ci.pNext = nullptr;
    viewport_state_ci.flags = 0;
    viewport_state_ci.viewportCount = 1;
+    viewport_state_ci.pViewports = nullptr;
    viewport_state_ci.scissorCount = 1;
+    viewport_state_ci.pScissors = nullptr;

    VkPipelineRasterizationStateCreateInfo rasterization_ci;
    rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -62,13 +62,16 @@ constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::Sha

 VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) {
    const auto& src = regs.viewport_transform[index];
+    const float width = src.scale_x * 2.0f;
+    const float height = src.scale_y * 2.0f;
+
    VkViewport viewport;
    viewport.x = src.translate_x - src.scale_x;
    viewport.y = src.translate_y - src.scale_y;
-    viewport.width = src.scale_x * 2.0f;
-    viewport.height = src.scale_y * 2.0f;
+    viewport.width = width != 0.0f ? width : 1.0f;
+    viewport.height = height != 0.0f ? height : 1.0f;

-    const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
+    const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
    viewport.minDepth = src.translate_z - src.scale_z * reduce_z;
    viewport.maxDepth = src.translate_z + src.scale_z;
    if (!device.IsExtDepthRangeUnrestrictedSupported()) {
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1938,11 +1938,8 @@ private:
        return {};
    }

-    template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type,
-              Type value_type = result_type>
+    template <Id (Module::*func)(Id, Id, Id, Id, Id)>
    Expression Atomic(Operation operation) {
-        const Id type_def = GetTypeDefinition(result_type);
-
        Id pointer;
        if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
            pointer = GetSharedMemoryPointer(*smem);
@@ -1950,15 +1947,19 @@ private:
            pointer = GetGlobalMemoryPointer(*gmem);
        } else {
            UNREACHABLE();
-            return {Constant(type_def, 0), result_type};
+            return {v_float_zero, Type::Float};
        }
-
-        const Id value = As(Visit(operation[1]), value_type);
-
        const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
-        const Id semantics = Constant(type_def, 0);
+        const Id semantics = Constant(t_uint, 0);
+        const Id value = AsUint(Visit(operation[1]));

-        return {(this->*func)(type_def, pointer, scope, semantics, value), result_type};
+        return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
+    }
+
+    template <Id (Module::*func)(Id, Id, Id, Id, Id)>
+    Expression Reduce(Operation operation) {
+        Atomic<func>(operation);
+        return {};
    }

    Expression Branch(Operation operation) {
@@ -2547,21 +2548,35 @@ private:
        &SPIRVDecompiler::AtomicImageXor,
        &SPIRVDecompiler::AtomicImageExchange,

-        &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Uint>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Uint>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin, Type::Uint>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax, Type::Uint>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Uint>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Uint>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Uint>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,

-        &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Int>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Int>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin, Type::Int>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax, Type::Int>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Int>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Int>,
-        &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Int>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
+        &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
+
+        &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
+        &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>,
+        &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>,
+        &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
+        &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
+        &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
+
+        &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
+        &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>,
+        &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>,
+        &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
+        &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
+        &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,

        &SPIRVDecompiler::Branch,
        &SPIRVDecompiler::BranchIndirect,
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -136,7 +136,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
        SetRegister(bb, instr.gpr0, value);
        break;
    }
-    case OpCode::Id::FCMP_R: {
+    case OpCode::Id::FCMP_RR:
+    case OpCode::Id::FCMP_RC: {
        UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
        Node op_c = GetRegister(instr.gpr39);
        Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -3,7 +3,9 @@
 // Refer to the license.txt file included.

 #include <algorithm>
+#include <utility>
 #include <vector>
+
 #include <fmt/format.h>

 #include "common/alignment.h"
@@ -16,6 +18,7 @@

 namespace VideoCommon::Shader {

+using std::move;
 using Tegra::Shader::AtomicOp;
 using Tegra::Shader::AtomicType;
 using Tegra::Shader::Attribute;
@@ -27,29 +30,26 @@ using Tegra::Shader::StoreType;

 namespace {

-Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) {
-    const OperationCode operation_code = [op] {
-        switch (op) {
-        case AtomicOp::Add:
-            return OperationCode::AtomicIAdd;
-        case AtomicOp::Min:
-            return OperationCode::AtomicIMin;
-        case AtomicOp::Max:
-            return OperationCode::AtomicIMax;
-        case AtomicOp::And:
-            return OperationCode::AtomicIAnd;
-        case AtomicOp::Or:
-            return OperationCode::AtomicIOr;
-        case AtomicOp::Xor:
-            return OperationCode::AtomicIXor;
-        case AtomicOp::Exch:
-            return OperationCode::AtomicIExchange;
-        default:
-            UNIMPLEMENTED_MSG("op={}", static_cast<int>(op));
-            return OperationCode::AtomicIAdd;
-        }
-    }();
-    return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data));
+OperationCode GetAtomOperation(AtomicOp op) {
+    switch (op) {
+    case AtomicOp::Add:
+        return OperationCode::AtomicIAdd;
+    case AtomicOp::Min:
+        return OperationCode::AtomicIMin;
+    case AtomicOp::Max:
+        return OperationCode::AtomicIMax;
+    case AtomicOp::And:
+        return OperationCode::AtomicIAnd;
+    case AtomicOp::Or:
+        return OperationCode::AtomicIOr;
+    case AtomicOp::Xor:
+        return OperationCode::AtomicIXor;
+    case AtomicOp::Exch:
+        return OperationCode::AtomicIExchange;
+    default:
+        UNIMPLEMENTED_MSG("op={}", static_cast<int>(op));
+        return OperationCode::AtomicIAdd;
+    }
 }

 bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
@@ -90,23 +90,22 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {

 Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
    Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
-    offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
-    return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset),
-                     Immediate(size));
+    offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
+    return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
 }

 Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
-    Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask));
-    offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
-    return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value),
-                     std::move(offset), Immediate(size));
+    Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
+    offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
+    return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
+                     Immediate(size));
 }

 Node Sign16Extend(Node value) {
    Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
-    Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15));
+    Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
    Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
-    return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend));
+    return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
 }

 } // Anonymous namespace
@@ -379,20 +378,36 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {

            if (IsUnaligned(type)) {
                const u32 mask = GetUnalignedMask(type);
-                value = InsertUnaligned(gmem, std::move(value), real_address, mask, size);
+                value = InsertUnaligned(gmem, move(value), real_address, mask, size);
            }

            bb.push_back(Operation(OperationCode::Assign, gmem, value));
        }
        break;
    }
+    case OpCode::Id::RED: {
+        UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
+        UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add);
+        const auto [real_address, base_address, descriptor] =
+            TrackGlobalMemory(bb, instr, true, true);
+        if (!real_address || !base_address) {
+            // Tracking failed, skip atomic.
+            break;
+        }
+        Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+        Node value = GetRegister(instr.gpr0);
+        bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value)));
+        break;
+    }
    case OpCode::Id::ATOM: {
        UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
                                 instr.atom.operation == AtomicOp::Dec ||
                                 instr.atom.operation == AtomicOp::SafeAdd,
                             "operation={}", static_cast<int>(instr.atom.operation.Value()));
        UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
-                                 instr.atom.type == GlobalAtomicType::U64,
+                                 instr.atom.type == GlobalAtomicType::U64 ||
+                                 instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
+                                 instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
                             "type={}", static_cast<int>(instr.atom.type.Value()));

        const auto [real_address, base_address, descriptor] =
@@ -403,11 +418,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        }

        const bool is_signed =
-            instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
+            instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
        Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-        Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem,
-                                      GetRegister(instr.gpr20));
-        SetRegister(bb, instr.gpr0, std::move(value));
+        SetRegister(bb, instr.gpr0,
+                    SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
+                                    GetRegister(instr.gpr20)));
        break;
    }
    case OpCode::Id::ATOMS: {
@@ -421,11 +436,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
        const s32 offset = instr.atoms.GetImmediateOffset();
        Node address = GetRegister(instr.gpr8);
-        address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
-        Node value =
-            GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed,
-                             GetSharedMemory(std::move(address)), GetRegister(instr.gpr20));
-        SetRegister(bb, instr.gpr0, std::move(value));
+        address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
+        SetRegister(bb, instr.gpr0,
+                    SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
+                                    GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
        break;
    }
    case OpCode::Id::AL2P: {
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -178,6 +178,20 @@ enum class OperationCode {
    AtomicIOr,       /// (memory, int) -> int
    AtomicIXor,      /// (memory, int) -> int

+    ReduceUAdd, /// (memory, uint) -> void
+    ReduceUMin, /// (memory, uint) -> void
+    ReduceUMax, /// (memory, uint) -> void
+    ReduceUAnd, /// (memory, uint) -> void
+    ReduceUOr,  /// (memory, uint) -> void
+    ReduceUXor, /// (memory, uint) -> void
+
+    ReduceIAdd, /// (memory, int) -> void
+    ReduceIMin, /// (memory, int) -> void
+    ReduceIMax, /// (memory, int) -> void
+    ReduceIAnd, /// (memory, int) -> void
+    ReduceIOr,  /// (memory, int) -> void
+    ReduceIXor, /// (memory, int) -> void
+
    Branch,         /// (uint branch_target) -> void
    BranchIndirect, /// (uint branch_target) -> void
    PushFlowStack,  /// (uint branch_target) -> void
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -196,9 +196,9 @@ std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb
                                              ComponentType alpha_component) noexcept {
    const auto format_index = static_cast<std::size_t>(format);
    const auto red_index = static_cast<std::size_t>(red_component);
-    const auto green_index = static_cast<std::size_t>(red_component);
-    const auto blue_index = static_cast<std::size_t>(red_component);
-    const auto alpha_index = static_cast<std::size_t>(red_component);
+    const auto green_index = static_cast<std::size_t>(green_component);
+    const auto blue_index = static_cast<std::size_t>(blue_component);
+    const auto alpha_index = static_cast<std::size_t>(alpha_component);
    const std::size_t srgb_index = is_srgb ? 1 : 0;

    return format_index * PerFormat +
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -509,7 +509,9 @@ private:
        }
        const auto& final_params = new_surface->GetSurfaceParams();
        if (cr_params.type != final_params.type) {
-            BufferCopy(current_surface, new_surface);
+            if (Settings::values.use_accurate_gpu_emulation) {
+                BufferCopy(current_surface, new_surface);
+            }
        } else {
            std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
            for (auto& brick : bricks) {
Author	SHA1	Message	Date
Lioncash	636c8ab85b	texture_cache/format_lookup_table: Fix incorrect green, blue, and alpha indices Previously these were all using the red component to derive the indices, which is definitely not intentional.	2020-04-15 23:50:46 -04:00
Fernando Sahmkow	e33196d4e7	Merge pull request #3612 from ReinUsesLisp/red shader/memory: Implement RED.E.ADD and minor changes to ATOM	2020-04-15 15:03:49 -04:00
Mat M	4398bdb4c7	Merge pull request #3670 from lioncash/reorder CMakeLists: Make -Wreorder a compile-time error	2020-04-15 14:40:05 -04:00
Lioncash	213fff67bc	CMakeLists: Make -Wreorder a compile-time error This can result in silent logic bugs within code, and given the amount of times these kind of warnings are caused, they should be flagged at compile-time so no new code is submitted with them.	2020-04-15 14:14:41 -04:00
Mat M	64b5985f0a	Merge pull request #3662 from ReinUsesLisp/constant-attrs gl_rasterizer: Implement constant vertex attributes	2020-04-15 11:54:50 -04:00
Mat M	9208d555b7	Merge pull request #3668 from ReinUsesLisp/vtx-format-16ui maxwell_to_vk: Add uint16 vertex formats	2020-04-15 11:43:52 -04:00
Mat M	ab72696beb	Merge pull request #3656 from ReinUsesLisp/glsl-full-decompile gl_shader_cache: Use CompileDepth::FullDecompile on GLSL	2020-04-15 03:17:46 -04:00
Mat M	4878d6bb49	Merge pull request #3654 from ReinUsesLisp/fix-fb-attach gl_texture_cache: Fix layered texture attachment base level	2020-04-15 03:17:18 -04:00
Mat M	50c0a92db8	Merge pull request #3663 from ReinUsesLisp/fcmp-rc shader/arithmetic: Add FCMP_CR variant	2020-04-15 03:16:56 -04:00
Mat M	13331a3a32	Merge pull request #3664 from ReinUsesLisp/fe3h-black-squares Revert "gl_shader_decompiler: Implement merges with bitfieldInsert"	2020-04-15 03:14:28 -04:00
Mat M	3a759d2352	Merge pull request #3667 from ReinUsesLisp/viewport-trash vk_blit_screen: Initialize all members in VkPipelineViewportStateCreateInfo	2020-04-15 03:10:34 -04:00
ReinUsesLisp	3036067047	maxwell_to_vk: Add uint16 vertex formats	2020-04-15 04:06:30 -03:00
ReinUsesLisp	b4e43c64c8	maxwell_to_vk: Add missing breaks Avoid invalid fallbacks.	2020-04-15 04:05:33 -03:00
ReinUsesLisp	0ca456830f	vk_blit_screen: Initialize all members in VkPipelineViewportStateCreateInfo When the dynamic state is specified, pViewports and pScissors are ignored, quoting the specification: pViewports is a pointer to an array of VkViewport structures, defining the viewport transforms. If the viewport state is dynamic, this member is ignored. That said, AMD's proprietary driver itself seem to read it regardless of what the specification says.	2020-04-15 03:30:08 -03:00
Rodrigo Locatti	0b132e8cc1	Merge pull request #3657 from ReinUsesLisp/viewport-zero vk_rasterizer: Default to 1 viewports with a size of 0	2020-04-15 01:51:17 -03:00
Fernando Sahmkow	daddbeffd1	Texture Cache: Only do buffer copies on accurate GPU. (#3634 ) This is a simple optimization as Buffer Copies are mostly used for texture recycling. They are, however, useful when games abuse undefined behavior but most 3D APIs forbid it.	2020-04-14 23:21:00 -04:00
ReinUsesLisp	fd6371eba7	Revert "gl_shader_decompiler: Implement merges with bitfieldInsert" This reverts commit `05cf270836`. Apparently the first approach using floats instead of bitfieldInert worked better for Fire Emblem: Three Houses. Reverting to get that behavior back.	2020-04-14 21:24:33 -03:00
ReinUsesLisp	fefe7f18f9	shader/arithmetic: Add FCMP_CR variant Adds another variant of FCMP.	2020-04-14 19:11:04 -03:00
Zach Hilman	e366b4ee1f	Merge pull request #3660 from bunnei/friend-blocked-users service: friend: Stub IFriendService::GetBlockedUserListIds.	2020-04-14 16:59:46 -04:00
Zach Hilman	8040f6d544	Merge pull request #3661 from bunnei/patch-manager-fix file_sys: patch_manager: Return early when there are no layers to apply.	2020-04-14 16:59:25 -04:00
ReinUsesLisp	6dfcabc800	gl_rasterizer: Implement constant vertex attributes Credits go to gdkchan from Ryujinx for finding constant attributes are used in retail games.	2020-04-14 17:58:53 -03:00
bunnei	598740f1dd	service: friend: Stub IFriendService::GetBlockedUserListIds. - This is safe to stub, as there should be no adverse consequences from reporting no blocked users.	2020-04-14 16:20:51 -04:00
ReinUsesLisp	37e5c4fa7c	vk_rasterizer: Default to 1 viewports with a size of 0 Silence validation layer errors.	2020-04-14 04:44:34 -03:00
ReinUsesLisp	453d7419d9	gl_shader_cache: Use CompileDepth::FullDecompile on GLSL From my testing on a Splatoon 2 shader that takes 3800ms on average to compile changing to FullDecompile reduces it to 900ms on average. The shader decoder will automatically fallback to a more naive method if it can't use full decompile.	2020-04-14 01:34:20 -03:00
ReinUsesLisp	21dc842171	gl_texture_cache: Fix layered texture attachment base level The base level is already included in the texture view. If we specify the base level in the texture again, this will end up in the incorrect level and potentially out of bounds.	2020-04-13 18:24:56 -03:00
ReinUsesLisp	3185245845	shader/memory: Implement RED.E.ADD Implements a reduction operation. It's an atomic operation that doesn't return a value. This commit introduces another primitive because some shading languages might have a primitive for reduction operations.	2020-04-06 02:24:47 -03:00
ReinUsesLisp	fd0a2b5151	shader/memory: Add "using std::move"	2020-04-06 02:18:14 -03:00
ReinUsesLisp	79970c9174	shader/memory: Minor fixes in ATOM	2020-04-06 00:54:22 -03:00