From c96da97630e6c9746bd0a3ef62d8e54364bf1281 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 30 Jun 2018 03:00:39 -0400
Subject: [PATCH 1/4] gl_shader_decompiler: Implement predicate
 NotEqualWithNan.

---
 src/video_core/engines/shader_bytecode.h      |  1 +
 .../renderer_opengl/gl_shader_decompiler.cpp  | 40 +++++++++++--------
 2 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index cb4db06792..0527fc376b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -142,6 +142,7 @@ enum class PredCondition : u64 {
     GreaterThan = 4,
     NotEqual = 5,
     GreaterEqual = 6,
+    NotEqualWithNan = 13,
     // TODO(Subv): Other condition types
 };
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 46eaad0217..3ef79a5e7a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -719,21 +719,31 @@ private:
     /**
      * Returns the comparison string to use to compare two values in the 'set' family of
      * instructions.
-     * @params condition The condition used in the 'set'-family instruction.
+     * @param condition The condition used in the 'set'-family instruction.
+     * @param op_a First operand to use for the comparison.
+     * @param op_b Second operand to use for the comparison.
      * @returns String corresponding to the GLSL operator that matches the desired comparison.
      */
-    std::string GetPredicateComparison(Tegra::Shader::PredCondition condition) const {
+    std::string GetPredicateComparison(Tegra::Shader::PredCondition condition,
+                                       const std::string& op_a, const std::string& op_b) const {
         using Tegra::Shader::PredCondition;
         static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
-            {PredCondition::LessThan, "<"},   {PredCondition::Equal, "=="},
-            {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"},
-            {PredCondition::NotEqual, "!="},  {PredCondition::GreaterEqual, ">="},
+            {PredCondition::LessThan, "<"},         {PredCondition::Equal, "=="},
+            {PredCondition::LessEqual, "<="},       {PredCondition::GreaterThan, ">"},
+            {PredCondition::NotEqual, "!="},        {PredCondition::GreaterEqual, ">="},
+            {PredCondition::NotEqualWithNan, "!="},
         };
 
-        auto comparison = PredicateComparisonStrings.find(condition);
+        const auto& comparison{PredicateComparisonStrings.find(condition)};
         ASSERT_MSG(comparison != PredicateComparisonStrings.end(),
                    "Unknown predicate comparison operation");
-        return comparison->second;
+
+        std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'};
+        if (condition == PredCondition::NotEqualWithNan) {
+            predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')';
+        }
+
+        return predicate;
     }
 
     /**
@@ -1415,10 +1425,9 @@ private:
             std::string second_pred =
                 GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
 
-            std::string comparator = GetPredicateComparison(instr.fsetp.cond);
             std::string combiner = GetPredicateCombiner(instr.fsetp.op);
 
-            std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')';
+            std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
             // Set the primary predicate to the result of Predicate OP SecondPredicate
             SetPredicate(instr.fsetp.pred3,
                          '(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1453,10 +1462,9 @@ private:
             std::string second_pred =
                 GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0);
 
-            std::string comparator = GetPredicateComparison(instr.isetp.cond);
             std::string combiner = GetPredicateCombiner(instr.isetp.op);
 
-            std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')';
+            std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
             // Set the primary predicate to the result of Predicate OP SecondPredicate
             SetPredicate(instr.isetp.pred3,
                          '(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1503,11 +1511,10 @@ private:
             std::string second_pred =
                 GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
 
-            std::string comparator = GetPredicateComparison(instr.fset.cond);
             std::string combiner = GetPredicateCombiner(instr.fset.op);
 
-            std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
-                                    combiner + " (" + second_pred + "))";
+            std::string predicate = "((" + GetPredicateComparison(instr.fset.cond, op_a, op_b) +
+                                    ") " + combiner + " (" + second_pred + "))";
 
             if (instr.fset.bf) {
                 regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -1538,11 +1545,10 @@ private:
             std::string second_pred =
                 GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
 
-            std::string comparator = GetPredicateComparison(instr.iset.cond);
             std::string combiner = GetPredicateCombiner(instr.iset.op);
 
-            std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
-                                    combiner + " (" + second_pred + "))";
+            std::string predicate = "((" + GetPredicateComparison(instr.iset.cond, op_a, op_b) +
+                                    ") " + combiner + " (" + second_pred + "))";
 
             if (instr.iset.bf) {
                 regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);

From b11072d54a4e324fc6ad380f232c1de8bcf9ab63 Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Sat, 30 Jun 2018 14:08:51 -0500
Subject: [PATCH 2/4] GLCache: Specify the component type along the texture
 type in the format tuple.

---
 .../renderer_opengl/gl_rasterizer_cache.cpp   | 38 ++++++++++---------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 63f5999eae..2864a7c8e4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -27,6 +27,7 @@ struct FormatTuple {
     GLint internal_format;
     GLenum format;
     GLenum type;
+    ComponentType component_type;
     bool compressed;
 };
 
@@ -65,29 +66,32 @@ struct FormatTuple {
 }
 
 static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                    // ABGR8
-    {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false},                       // B5G6R5
-    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false},              // A2B10G10R10
-    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false},                // A1B5G5R5
-    {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false},                                   // R8
-    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false},                                // RGBA16F
-    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false},        // R11FG11FB10F
-    {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true},   // DXT1
-    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
-    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
-    {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true},           // DXN1
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                               // ASTC_2D_4X4
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8
+    {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false},    // B5G6R5
+    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
+     false}, // A2B10G10R10
+    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5
+    {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},                    // R8
+    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false},                 // RGBA16F
+    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
+     false}, // R11FG11FB10F
+    {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+     true}, // DXT1
+    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+     true}, // DXT23
+    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+     true},                                                                                 // DXT45
+    {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
 }};
 
 static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
     const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
     if (type == SurfaceType::ColorTexture) {
         ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
-        // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which
-        // are type FLOAT
-        ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F ||
-               pixel_format == PixelFormat::R11FG11FB10F);
-        return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
+        auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)];
+        ASSERT(component_type == format.component_type);
+        return format;
     } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
         // TODO(Subv): Implement depth formats
         ASSERT_MSG(false, "Unimplemented");

From c0e2d5275814e6d3fe15d1b8abb0f057c0e5d155 Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Sat, 30 Jun 2018 14:23:13 -0500
Subject: [PATCH 3/4] GPU: Implemented the RGBA32_UINT rendertarget format.

---
 src/video_core/gpu.h                          |  1 +
 .../renderer_opengl/gl_rasterizer_cache.cpp   | 11 ++++++----
 .../renderer_opengl/gl_rasterizer_cache.h     | 21 ++++++++++++++-----
 src/video_core/textures/decoders.cpp          |  4 ++++
 4 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 7b4e9b8423..d0a4ac2671 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -16,6 +16,7 @@ namespace Tegra {
 enum class RenderTargetFormat : u32 {
     NONE = 0x0,
     RGBA32_FLOAT = 0xC0,
+    RGBA32_UINT = 0xC2,
     RGBA16_FLOAT = 0xCA,
     RGB10_A2_UNORM = 0xD1,
     RGBA8_UNORM = 0xD5,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 2864a7c8e4..ae48378f31 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -74,7 +74,8 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},                    // R8
     {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false},                 // RGBA16F
     {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
-     false}, // R11FG11FB10F
+     false},                                                                     // R11FG11FB10F
+    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
     {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
      true}, // DXT1
     {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
@@ -170,9 +171,10 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
         MortonCopy<true, PixelFormat::ABGR8>,        MortonCopy<true, PixelFormat::B5G6R5>,
         MortonCopy<true, PixelFormat::A2B10G10R10>,  MortonCopy<true, PixelFormat::A1B5G5R5>,
         MortonCopy<true, PixelFormat::R8>,           MortonCopy<true, PixelFormat::RGBA16F>,
-        MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>,
-        MortonCopy<true, PixelFormat::DXT23>,        MortonCopy<true, PixelFormat::DXT45>,
-        MortonCopy<true, PixelFormat::DXN1>,         MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
+        MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>,
+        MortonCopy<true, PixelFormat::DXT1>,         MortonCopy<true, PixelFormat::DXT23>,
+        MortonCopy<true, PixelFormat::DXT45>,        MortonCopy<true, PixelFormat::DXN1>,
+        MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
 };
 
 static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
@@ -185,6 +187,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
         MortonCopy<false, PixelFormat::R8>,
         MortonCopy<false, PixelFormat::RGBA16F>,
         MortonCopy<false, PixelFormat::R11FG11FB10F>,
+        MortonCopy<false, PixelFormat::RGBA32UI>,
         // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported
         nullptr,
         nullptr,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 85e7c88889..99be250b44 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -30,11 +30,12 @@ struct SurfaceParams {
         R8 = 4,
         RGBA16F = 5,
         R11FG11FB10F = 6,
-        DXT1 = 7,
-        DXT23 = 8,
-        DXT45 = 9,
-        DXN1 = 10, // This is also known as BC4
-        ASTC_2D_4X4 = 11,
+        RGBA32UI = 7,
+        DXT1 = 8,
+        DXT23 = 9,
+        DXT45 = 10,
+        DXN1 = 11, // This is also known as BC4
+        ASTC_2D_4X4 = 12,
 
         Max,
         Invalid = 255,
@@ -77,6 +78,7 @@ struct SurfaceParams {
             1, // R8
             1, // RGBA16F
             1, // R11FG11FB10F
+            1, // RGBA32UI
             4, // DXT1
             4, // DXT23
             4, // DXT45
@@ -100,6 +102,7 @@ struct SurfaceParams {
             8,   // R8
             64,  // RGBA16F
             32,  // R11FG11FB10F
+            128, // RGBA32UI
             64,  // DXT1
             128, // DXT23
             128, // DXT45
@@ -125,6 +128,8 @@ struct SurfaceParams {
             return PixelFormat::RGBA16F;
         case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
             return PixelFormat::R11FG11FB10F;
+        case Tegra::RenderTargetFormat::RGBA32_UINT:
+            return PixelFormat::RGBA32UI;
         default:
             NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
@@ -148,6 +153,8 @@ struct SurfaceParams {
             return PixelFormat::RGBA16F;
         case Tegra::Texture::TextureFormat::BF10GF11RF11:
             return PixelFormat::R11FG11FB10F;
+        case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
+            return PixelFormat::RGBA32UI;
         case Tegra::Texture::TextureFormat::DXT1:
             return PixelFormat::DXT1;
         case Tegra::Texture::TextureFormat::DXT23:
@@ -181,6 +188,8 @@ struct SurfaceParams {
             return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
         case PixelFormat::R11FG11FB10F:
             return Tegra::Texture::TextureFormat::BF10GF11RF11;
+        case PixelFormat::RGBA32UI:
+            return Tegra::Texture::TextureFormat::R32_G32_B32_A32;
         case PixelFormat::DXT1:
             return Tegra::Texture::TextureFormat::DXT1;
         case PixelFormat::DXT23:
@@ -217,6 +226,8 @@ struct SurfaceParams {
         case Tegra::RenderTargetFormat::RGBA16_FLOAT:
         case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
             return ComponentType::Float;
+        case Tegra::RenderTargetFormat::RGBA32_UINT:
+            return ComponentType::UInt;
         default:
             NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 0db4367f16..eaf15da328 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -65,6 +65,8 @@ u32 BytesPerPixel(TextureFormat format) {
         return 1;
     case TextureFormat::R16_G16_B16_A16:
         return 8;
+    case TextureFormat::R32_G32_B32_A32:
+        return 16;
     default:
         UNIMPLEMENTED_MSG("Format not implemented");
         break;
@@ -94,6 +96,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
     case TextureFormat::B5G6R5:
     case TextureFormat::R8:
     case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::R32_G32_B32_A32:
     case TextureFormat::BF10GF11RF11:
     case TextureFormat::ASTC_2D_4X4:
         CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
@@ -124,6 +127,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
     case TextureFormat::B5G6R5:
     case TextureFormat::R8:
     case TextureFormat::BF10GF11RF11:
+    case TextureFormat::R32_G32_B32_A32:
         // TODO(Subv): For the time being just forward the same data without any decoding.
         rgba_data = texture_data;
         break;

From f33e406ff28724cd64a945d97b1f2df051fe4881 Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Sat, 30 Jun 2018 14:48:25 -0500
Subject: [PATCH 4/4] GPU: Corrected the size of the MUFU subop field, and
 removed incorrect "min" operation.

---
 src/video_core/engines/shader_bytecode.h                | 3 +--
 src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 4 ----
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index cb4db06792..fcc0d32d94 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -165,7 +165,6 @@ enum class SubOp : u64 {
     Lg2 = 0x3,
     Rcp = 0x4,
     Rsq = 0x5,
-    Min = 0x8,
 };
 
 enum class F2iRoundingOp : u64 {
@@ -209,7 +208,7 @@ union Instruction {
     } pred;
     BitField<19, 1, u64> negate_pred;
     BitField<20, 8, Register> gpr20;
-    BitField<20, 7, SubOp> sub_op;
+    BitField<20, 4, SubOp> sub_op;
     BitField<28, 8, Register> gpr28;
     BitField<39, 8, Register> gpr39;
     BitField<48, 16, u64> opcode;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 46eaad0217..5a43d8e24a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -907,10 +907,6 @@ private:
                     regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
                                             instr.alu.saturate_d);
                     break;
-                case SubOp::Min:
-                    regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1,
-                                            instr.alu.saturate_d);
-                    break;
                 default:
                     NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
                                    static_cast<unsigned>(instr.sub_op.Value()));